diff --git a/android/MLCChat/README.md b/android/MLCChat/README.md new file mode 100644 index 0000000000..445d09a659 --- /dev/null +++ b/android/MLCChat/README.md @@ -0,0 +1,6 @@ +# MLC-LLM Android + +Checkout [Documentation page](https://llm.mlc.ai/docs/deploy/android.html) for more information. + +- run `mlc_llm package` +- open this `MLCChat/` folder as a project in Android Studio diff --git a/android/app/.gitignore b/android/MLCChat/app/.gitignore similarity index 100% rename from android/app/.gitignore rename to android/MLCChat/app/.gitignore diff --git a/android/app/build.gradle b/android/MLCChat/app/build.gradle similarity index 98% rename from android/app/build.gradle rename to android/MLCChat/app/build.gradle index 1fd30e3985..47b2915460 100644 --- a/android/app/build.gradle +++ b/android/MLCChat/app/build.gradle @@ -47,7 +47,7 @@ android { } dependencies { - implementation project(":library") + implementation project(":mlc4j") implementation 'androidx.core:core-ktx:1.10.1' implementation 'androidx.lifecycle:lifecycle-runtime-ktx:2.6.1' implementation 'androidx.activity:activity-compose:1.7.1' diff --git a/android/app/proguard-rules.pro b/android/MLCChat/app/proguard-rules.pro similarity index 100% rename from android/app/proguard-rules.pro rename to android/MLCChat/app/proguard-rules.pro diff --git a/android/app/src/main/AndroidManifest.xml b/android/MLCChat/app/src/main/AndroidManifest.xml similarity index 100% rename from android/app/src/main/AndroidManifest.xml rename to android/MLCChat/app/src/main/AndroidManifest.xml diff --git a/android/app/src/main/ic_launcher-playstore.png b/android/MLCChat/app/src/main/ic_launcher-playstore.png similarity index 100% rename from android/app/src/main/ic_launcher-playstore.png rename to android/MLCChat/app/src/main/ic_launcher-playstore.png diff --git a/android/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt similarity index 99% rename from android/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt index 6a3bf4a211..cd8b23ce08 100644 --- a/android/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt +++ b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt @@ -38,7 +38,7 @@ class AppViewModel(application: Application) : AndroidViewModel(application) { private val modelIdSet = emptySet().toMutableSet() companion object { - const val AppConfigFilename = "app-config.json" + const val AppConfigFilename = "mlc-app-config.json" const val ModelConfigFilename = "mlc-chat-config.json" const val ParamsConfigFilename = "ndarray-cache.json" const val ModelUrlSuffix = "resolve/main/" diff --git a/android/app/src/main/java/ai/mlc/mlcchat/ChatView.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ChatView.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/ChatView.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ChatView.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/MainActivity.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/MainActivity.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/MainActivity.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/MainActivity.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/NavView.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/NavView.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/NavView.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/NavView.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/StartView.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/StartView.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/StartView.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/StartView.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Color.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Color.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Color.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Color.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Theme.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Theme.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Theme.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Theme.kt diff --git a/android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Type.kt b/android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Type.kt similarity index 100% rename from android/app/src/main/java/ai/mlc/mlcchat/ui/theme/Type.kt rename to android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Type.kt diff --git a/android/app/src/main/res/drawable/ic_android_black_24dp.xml b/android/MLCChat/app/src/main/res/drawable/ic_android_black_24dp.xml similarity index 100% rename from android/app/src/main/res/drawable/ic_android_black_24dp.xml rename to android/MLCChat/app/src/main/res/drawable/ic_android_black_24dp.xml diff --git a/android/app/src/main/res/drawable/mlc_logo_108.xml b/android/MLCChat/app/src/main/res/drawable/mlc_logo_108.xml similarity index 100% rename from android/app/src/main/res/drawable/mlc_logo_108.xml rename to android/MLCChat/app/src/main/res/drawable/mlc_logo_108.xml diff --git a/android/app/src/main/res/values/colors.xml b/android/MLCChat/app/src/main/res/values/colors.xml similarity index 100% rename from android/app/src/main/res/values/colors.xml rename to android/MLCChat/app/src/main/res/values/colors.xml diff --git a/android/app/src/main/res/values/strings.xml b/android/MLCChat/app/src/main/res/values/strings.xml similarity index 100% rename from android/app/src/main/res/values/strings.xml rename to android/MLCChat/app/src/main/res/values/strings.xml diff --git a/android/app/src/main/res/values/themes.xml b/android/MLCChat/app/src/main/res/values/themes.xml similarity index 100% rename from android/app/src/main/res/values/themes.xml rename to android/MLCChat/app/src/main/res/values/themes.xml diff --git a/android/app/src/main/res/xml/backup_rules.xml b/android/MLCChat/app/src/main/res/xml/backup_rules.xml similarity index 100% rename from android/app/src/main/res/xml/backup_rules.xml rename to android/MLCChat/app/src/main/res/xml/backup_rules.xml diff --git a/android/app/src/main/res/xml/data_extraction_rules.xml b/android/MLCChat/app/src/main/res/xml/data_extraction_rules.xml similarity index 100% rename from android/app/src/main/res/xml/data_extraction_rules.xml rename to android/MLCChat/app/src/main/res/xml/data_extraction_rules.xml diff --git a/android/build.gradle b/android/MLCChat/build.gradle similarity index 100% rename from android/build.gradle rename to android/MLCChat/build.gradle diff --git a/android/MLCChat/bundle_weight.py b/android/MLCChat/bundle_weight.py new file mode 100644 index 0000000000..adade13071 --- /dev/null +++ b/android/MLCChat/bundle_weight.py @@ -0,0 +1,65 @@ +import argparse +import os +import subprocess +from pathlib import Path + +from mlc_llm.support import logging + +logging.enable_logging() +logger = logging.getLogger(__name__) + + +def main(apk_path: Path, package_output_path: Path): + """Push weights to the android device with adb""" + # - Install the apk on device. + logger.info('Install apk "%s" to device', str(apk_path.absolute())) + subprocess.run(["adb", "install", str(apk_path)], check=True, env=os.environ) + # - Create the weight directory for the app. + device_weihgt_dir = "/storage/emulated/0/Android/data/ai.mlc.mlcchat/files/" + logger.info('Creating directory "%s" on device', device_weihgt_dir) + subprocess.run( + ["adb", "shell", "mkdir", "-p", device_weihgt_dir], + check=True, + env=os.environ, + ) + for model_weight_dir in (package_output_path / "bundle").iterdir(): + if model_weight_dir.is_dir(): + src_path = str(model_weight_dir.absolute()) + dst_path = "/data/local/tmp/" + model_weight_dir.name + logger.info('Pushing local weights "%s" to device location "%s"', src_path, dst_path) + subprocess.run(["adb", "push", src_path, dst_path], check=True, env=os.environ) + + src_path = dst_path + dst_path = "/storage/emulated/0/Android/data/ai.mlc.mlcchat/files/" + logger.info('Move weights from "%s" to "%s"', src_path, dst_path) + subprocess.run(["adb", "shell", "mv", src_path, dst_path], check=True, env=os.environ) + logger.info("All finished.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("MLC LLM Android Weight Bundle") + + def _parse_apk_path(path: str) -> Path: + path = Path(path) + if not path.exists(): + raise ValueError( + f"Path {str(path)} is expected to be an apk file, but the file does not exist." + ) + if not path.is_file(): + raise ValueError(f"Path {str(path)} is expected to be an apk file.") + return path + + parser.add_argument( + "--apk-path", + type=_parse_apk_path, + default="app/release/app-release.apk", + help="The path to generated MLCChat apk file.", + ) + parser.add_argument( + "--package-output-path", + type=Path, + default="dist", + help='The path to the output directory of "mlc_llm package".', + ) + args = parser.parse_args() + main(args.apk_path, args.package_output_path) diff --git a/android/gradle.properties b/android/MLCChat/gradle.properties similarity index 100% rename from android/gradle.properties rename to android/MLCChat/gradle.properties diff --git a/android/gradle/wrapper/gradle-wrapper.jar b/android/MLCChat/gradle/wrapper/gradle-wrapper.jar similarity index 100% rename from android/gradle/wrapper/gradle-wrapper.jar rename to android/MLCChat/gradle/wrapper/gradle-wrapper.jar diff --git a/android/gradle/wrapper/gradle-wrapper.properties b/android/MLCChat/gradle/wrapper/gradle-wrapper.properties similarity index 100% rename from android/gradle/wrapper/gradle-wrapper.properties rename to android/MLCChat/gradle/wrapper/gradle-wrapper.properties diff --git a/android/gradlew b/android/MLCChat/gradlew similarity index 100% rename from android/gradlew rename to android/MLCChat/gradlew diff --git a/android/gradlew.bat b/android/MLCChat/gradlew.bat similarity index 100% rename from android/gradlew.bat rename to android/MLCChat/gradlew.bat diff --git a/android/MLCChat/mlc-package-config.json b/android/MLCChat/mlc-package-config.json new file mode 100644 index 0000000000..766d6d2a80 --- /dev/null +++ b/android/MLCChat/mlc-package-config.json @@ -0,0 +1,38 @@ +{ + "device": "android", + "model_list": [ + { + "model": "HF://mlc-ai/gemma-2b-it-q4f16_1-MLC", + "model_id": "gemma-2b-q4f16_1", + "estimated_vram_bytes": 3000000000 + }, + { + "model": "HF://mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC", + "estimated_vram_bytes": 4348727787, + "model_id": "Llama-2-7b-chat-hf-q4f16_1", + "overrides": { + "context_window_size": 768, + "prefill_chunk_size": 256 + } + }, + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "estimated_vram_bytes": 1948348579, + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1" + }, + { + "model": "HF://mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC", + "estimated_vram_bytes": 4275453296, + "model_id": "Mistral-7B-Instruct-v0.2-q4f16_1", + "overrides": { + "sliding_window_size": 768, + "prefill_chunk_size": 256 + } + }, + { + "model": "HF://mlc-ai/phi-2-q4f16_1-MLC", + "estimated_vram_bytes": 2036816936, + "model_id": "phi-2-q4f16_1" + } + ] +} diff --git a/android/settings.gradle b/android/MLCChat/settings.gradle similarity index 82% rename from android/settings.gradle rename to android/MLCChat/settings.gradle index 31e8cf1d87..6866480997 100644 --- a/android/settings.gradle +++ b/android/MLCChat/settings.gradle @@ -14,4 +14,5 @@ dependencyResolutionManagement { } rootProject.name = "MLCChat" include ':app' -include ':library' +include ':mlc4j' +project(':mlc4j').projectDir = file('dist/lib/mlc4j') diff --git a/android/library/prepare_libs.sh b/android/library/prepare_libs.sh deleted file mode 100755 index c089927d09..0000000000 --- a/android/library/prepare_libs.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -euxo pipefail - -rustup target add aarch64-linux-android - -mkdir -p build/model_lib - -python3 prepare_model_lib.py - -cd build -touch config.cmake -if [ ${TVM_HOME-0} -ne 0 ]; then - echo "set(TVM_HOME ${TVM_HOME})" >> config.cmake -fi - -cmake .. \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ - -DCMAKE_INSTALL_PREFIX=. \ - -DCMAKE_CXX_FLAGS="-O3" \ - -DANDROID_ABI=arm64-v8a \ - -DANDROID_NATIVE_API_LEVEL=android-24 \ - -DANDROID_PLATFORM=android-24 \ - -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ON \ - -DANDROID_STL=c++_static \ - -DUSE_HEXAGON_SDK=OFF \ - -DMLC_LLM_INSTALL_STATIC_LIB=ON \ - -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON \ - -DUSE_OPENCL=ON \ - -DUSE_OPENCL_ENABLE_HOST_PTR=ON \ - -DUSE_CUSTOM_LOGGING=ON \ - -cmake --build . --target tvm4j_runtime_packed --config release -cmake --build . --target install --config release -j diff --git a/android/library/prepare_model_lib.py b/android/library/prepare_model_lib.py deleted file mode 100644 index 9f143d7357..0000000000 --- a/android/library/prepare_model_lib.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import os - -from tvm.contrib import ndk - - -def get_model_libs(lib_path): - global_symbol_map = ndk.get_global_symbol_section_map(lib_path) - libs = [] - suffix = "___tvm_dev_mblob" - for name in global_symbol_map.keys(): - if name.endswith(suffix): - model_lib = name[: -len(suffix)] - if model_lib.startswith("_"): - model_lib = model_lib[1:] - libs.append(model_lib) - return libs - - -def main(): - app_config_path = "src/main/assets/app-config.json" - app_config = json.load(open(app_config_path, "r")) - artifact_path = os.path.abspath(os.path.join("../..", "dist")) - tar_list = [] - model_set = set() - - for model, model_lib in app_config["model_lib_path_for_prepare_libs"].items(): - path = os.path.join(artifact_path, model_lib) - if not os.path.isfile(path): - raise RuntimeError(f"Cannot find android library {path}") - tar_list.append(path) - model_set.add(model) - - lib_path = os.path.join("build", "model_lib", "libmodel_android.a") - ndk.create_staticlib(lib_path, tar_list) - print(f"Creating lib from {tar_list}..") - - available_model_libs = get_model_libs(lib_path) - print(f"Validating the library {lib_path}...") - print( - f"List of available model libs packaged: {available_model_libs}," - " if we have '-' in the model_lib string, it will be turned into '_'" - ) - global_symbol_map = ndk.get_global_symbol_section_map(lib_path) - error_happened = False - for item in app_config["model_list"]: - model_lib = item["model_lib"] - model_id = item["model_id"] - if model_lib not in model_set: - print( - f"ValidationError: model_lib={model_lib} specified for model_id={model_id} " - "is not included in model_lib_path_for_prepare_libs field, " - "This will cause the specific model not being able to load, " - f"please check {app_config_path}." - ) - error_happened = True - model_prefix_pattern = model_lib.replace("-", "_") + "___tvm_dev_mblob" - if ( - model_prefix_pattern not in global_symbol_map - and "_" + model_prefix_pattern not in global_symbol_map - ): - model_lib = app_config["model_lib_path_for_prepare_libs"][model_lib] - print( - "ValidationError:\n" - f"\tmodel_lib {model_lib} requested in {app_config_path} is not found in {lib_path}\n" - f"\tspecifically the model_lib for {model_lib} in model_lib_path_for_prepare_libs.\n" - f"\tcurrent available model_libs in {lib_path}: {available_model_libs}" - ) - error_happened = True - - if not error_happened: - print("Validation pass") - else: - print("Validation failed") - exit(255) - - -if __name__ == "__main__": - main() diff --git a/android/library/src/main/assets/app-config.json b/android/library/src/main/assets/app-config.json deleted file mode 100644 index 68442c234e..0000000000 --- a/android/library/src/main/assets/app-config.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "model_list": [ - { - "model_url": "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", - "model_id": "gemma-2b-q4f16_1", - "model_lib": "gemma_q4f16_1", - "estimated_vram_bytes": 3000000000 - }, - { - "model_url": "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/", - "model_lib": "llama_q4f16_1", - "estimated_vram_bytes": 4348727787, - "model_id": "Llama-2-7b-chat-hf-q4f16_1" - }, - { - "model_url": "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/", - "model_lib": "gpt_neox_q4f16_1", - "estimated_vram_bytes": 1948348579, - "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1" - }, - { - "model_url": "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC", - "model_lib": "mistral_q4f16_1", - "estimated_vram_bytes": 4275453296, - "model_id": "Mistral-7B-Instruct-v0.2-q4f16_1" - }, - { - "model_url": "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC", - "model_lib": "phi_msft_q4f16_1", - "estimated_vram_bytes": 2036816936, - "model_id": "phi-2-q4f16_1" - } - ], - "model_lib_path_for_prepare_libs": { - "gemma_q4f16_1": "prebuilt/lib/gemma-2b-it/gemma-2b-it-q4f16_1-android.tar", - "llama_q4f16_1": "prebuilt/lib/Llama-2-7b-chat-hf/Llama-2-7b-chat-hf-q4f16_1-android.tar", - "gpt_neox_q4f16_1": "prebuilt/lib/RedPajama-INCITE-Chat-3B-v1/RedPajama-INCITE-Chat-3B-v1-q4f16_1-android.tar", - "phi_msft_q4f16_1": "prebuilt/lib/phi-2/phi-2-q4f16_1-android.tar", - "mistral_q4f16_1": "prebuilt/lib/Mistral-7B-Instruct-v0.2/Mistral-7B-Instruct-v0.2-q4f16_1-android.tar" - } -} \ No newline at end of file diff --git a/android/library/.gitignore b/android/mlc4j/.gitignore similarity index 100% rename from android/library/.gitignore rename to android/mlc4j/.gitignore diff --git a/android/library/CMakeLists.txt b/android/mlc4j/CMakeLists.txt similarity index 97% rename from android/library/CMakeLists.txt rename to android/mlc4j/CMakeLists.txt index a7d5a1caf0..f4ce6f218d 100644 --- a/android/library/CMakeLists.txt +++ b/android/mlc4j/CMakeLists.txt @@ -37,7 +37,7 @@ add_custom_command( ) add_library(model_android STATIC IMPORTED) -set_target_properties(model_android PROPERTIES IMPORTED_LOCATION ${ANDROID_BIN_DIR}/model_lib/libmodel_android.a) +set_target_properties(model_android PROPERTIES IMPORTED_LOCATION ${ANDROID_BIN_DIR}/lib/libmodel_android.a) add_library(tvm4j_runtime_packed SHARED ${TVM_HOME}/jvm/native/src/main/native/org_apache_tvm_native_c_api.cc) diff --git a/android/library/build.gradle b/android/mlc4j/build.gradle similarity index 84% rename from android/library/build.gradle rename to android/mlc4j/build.gradle index 8e4a1b8408..a9058fd827 100644 --- a/android/library/build.gradle +++ b/android/mlc4j/build.gradle @@ -19,13 +19,13 @@ android { } sourceSets { main { - jniLibs.srcDirs = ['build/output'] + jniLibs.srcDirs = ['output'] } } } dependencies { - implementation fileTree(dir: 'build/output', include: ['*.jar']) + implementation fileTree(dir: 'output', include: ['*.jar']) implementation 'androidx.core:core-ktx:1.9.0' implementation 'androidx.appcompat:appcompat:1.6.1' implementation 'com.google.android.material:material:1.10.0' diff --git a/android/mlc4j/prepare_libs.py b/android/mlc4j/prepare_libs.py new file mode 100644 index 0000000000..19f80718f0 --- /dev/null +++ b/android/mlc4j/prepare_libs.py @@ -0,0 +1,90 @@ +"""The build script for mlc4j (MLC LLM and tvm4j)""" + +import argparse +import os +import subprocess +from pathlib import Path + +from mlc_llm.support import logging + +logging.enable_logging() +logger = logging.getLogger(__name__) + + +def run_cmake(mlc4j_path: Path): + if "ANDROID_NDK" not in os.environ: + raise ValueError( + f'Environment variable "ANDROID_NDK" is required but not found.' + "Please follow https://llm.mlc.ai/docs/deploy/android.html to properly " + 'specify "ANDROID_NDK".' + ) + logger.info("Running cmake") + cmd = [ + "cmake", + str(mlc4j_path), + "-DCMAKE_BUILD_TYPE=Release", + f"-DCMAKE_TOOLCHAIN_FILE={os.environ['ANDROID_NDK']}/build/cmake/android.toolchain.cmake", + "-DCMAKE_INSTALL_PREFIX=.", + '-DCMAKE_CXX_FLAGS="-O3"', + "-DANDROID_ABI=arm64-v8a", + "-DANDROID_NATIVE_API_LEVEL=android-24", + "-DANDROID_PLATFORM=android-24", + "-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ON", + "-DANDROID_STL=c++_static", + "-DUSE_HEXAGON_SDK=OFF", + "-DMLC_LLM_INSTALL_STATIC_LIB=ON", + "-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON", + "-DUSE_OPENCL=ON", + "-DUSE_OPENCL_ENABLE_HOST_PTR=ON", + "-DUSE_CUSTOM_LOGGING=ON", + ] + subprocess.run(cmd, check=True, env=os.environ) + + +def run_cmake_build(): + logger.info("Running cmake build") + cmd = ["cmake", "--build", ".", "--target", "tvm4j_runtime_packed", "--config", "release"] + subprocess.run(cmd, check=True, env=os.environ) + + +def run_cmake_install(): + logger.info("Running cmake install") + cmd = ["cmake", "--build", ".", "--target", "install", "--config", "release", "-j"] + subprocess.run(cmd, check=True, env=os.environ) + + +def main(mlc_llm_home: Path): + # - Setup rust. + subprocess.run(["rustup", "target", "add", "aarch64-linux-android"], check=True, env=os.environ) + + # - Build MLC LLM and tvm4j. + build_path = Path("build") + os.makedirs(build_path / "lib", exist_ok=True) + logger.info('Entering "%s" for MLC LLM and tvm4j build.', os.path.abspath(build_path)) + os.chdir(build_path) + # Generate config.cmake if TVM Home is set. + if "TVM_HOME" in os.environ: + logger.info('Set TVM_HOME to "%s"', os.environ["TVM_HOME"]) + with open("config.cmake", "w", encoding="utf-8") as file: + print("set(TVM_HOME ${%s})" % os.environ["TVM_HOME"], file=file) + + # - Run cmake, build and install + run_cmake(mlc_llm_home / "android" / "mlc4j") + run_cmake_build() + run_cmake_install() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("MLC LLM Android Lib Preparation") + + parser.add_argument( + "--mlc-llm-home", + type=Path, + default=os.environ.get("MLC_LLM_HOME", None), + help="The path to MLC LLM source", + ) + parsed = parser.parse_args() + if parsed.mlc_llm_home is None: + parsed.mlc_llm_home = Path(os.path.abspath(os.path.curdir)).parent.parent + os.environ["MLC_LLM_HOME"] = str(parsed.mlc_llm_home) + main(parsed.mlc_llm_home) diff --git a/android/library/src/cpp/tvm_runtime.h b/android/mlc4j/src/cpp/tvm_runtime.h similarity index 100% rename from android/library/src/cpp/tvm_runtime.h rename to android/mlc4j/src/cpp/tvm_runtime.h diff --git a/android/library/src/main/AndroidManifest.xml b/android/mlc4j/src/main/AndroidManifest.xml similarity index 100% rename from android/library/src/main/AndroidManifest.xml rename to android/mlc4j/src/main/AndroidManifest.xml diff --git a/android/library/src/main/java/ai/mlc/mlcllm/ChatModule.java b/android/mlc4j/src/main/java/ai/mlc/mlcllm/ChatModule.java similarity index 100% rename from android/library/src/main/java/ai/mlc/mlcllm/ChatModule.java rename to android/mlc4j/src/main/java/ai/mlc/mlcllm/ChatModule.java diff --git a/docs/compilation/package_model_libraries_weights.rst b/docs/compilation/package_model_libraries_weights.rst new file mode 100644 index 0000000000..0bab235eb4 --- /dev/null +++ b/docs/compilation/package_model_libraries_weights.rst @@ -0,0 +1,208 @@ +.. _package-model-libraries-weights: + +Package Model Libraries & Weights +================================= + +When we want to build LLM applications with MLC LLM (e.g., iOS/Android apps), +usually we need to build static model libraries and app binding libraries, +and sometimes bundle model weights into the app. +MLC LLM provides a tool for fast model library and weight packaging: ``mlc_llm package``. + +This page briefly introduces how to use ``mlc_llm package`` for packaging. +Tutorials :ref:`deploy-ios` and :ref:`deploy-android` contain detailed examples and instructions +on using this packaging tool for iOS and Android deployment. + +----- + +Introduction +------------ + +To use ``mlc_llm package``, we must clone the source code of `MLC LLM `_ +and `install the MLC LLM and TVM Unity package `_. +Depending on the app we build, there might be some other dependencies, which are described in +corresponding :ref:`iOS ` and :ref:`Android ` tutorials. + +After cloning, the basic usage of ``mlc_llm package`` is as the following. + +.. code:: bash + + export MLC_LLM_HOME=/path/to/mlc-llm + cd /path/to/app # The app root directory which contains "mlc-package-config.json". + # E.g., "ios/MLCChat" or "android/MLCChat" + mlc_llm package + +**The package command reads from the JSON file** ``mlc-package-config.json`` **under the current directory.** +The output of this command is a directory ``dist/``, +which contains the packaged model libraries (under ``dist/lib/``) and weights (under ``dist/bundle/``). +This directory contains all necessary data for the app build. +Depending on the app we build, the internal structure of ``dist/lib/`` may be different. + +.. code:: + + dist + ├── lib + │ └── ... + └── bundle + └── ... + +The input ``mlc-package-config.json`` file specifies + +* the device (e.g., iPhone or Android) to package model libraries and weights for, +* the list of models to package. + +Below is an example ``mlc-package-config.json`` file: + +.. code:: json + + { + "device": "iphone", + "model_list": [ + { + "model": "HF://mlc-ai/Mistral-7B-Instruct-v0.2-q3f16_1-MLC", + "model_id": "Mistral-7B-Instruct-v0.2-q3f16_1", + "estimated_vram_bytes": 3316000000, + "bundle_weight": true, + "overrides": { + "context_window_size": 512 + } + }, + { + "model": "HF://mlc-ai/gemma-2b-it-q4f16_1-MLC", + "model_id": "gemma-2b-q4f16_1", + "estimated_vram_bytes": 3000000000, + "overrides": { + "prefill_chunk_size": 128 + } + } + ] + } + +This example ``mlc-package-config.json`` specifies "iphone" as the target device. +In the ``model_list``, + +* ``model`` points to the Hugging Face repository which contains the pre-converted model weights. Apps will download model weights from the Hugging Face URL. +* ``model_id`` is a unique model identifier. +* ``estimated_vram_bytes`` is an estimation of the vRAM the model takes at runtime. +* ``"bundle_weight": true`` means the model weights of the model will be bundled into the app when building. +* ``overrides`` specifies some model config parameter overrides. + + +Below is a more detailed specification of the ``mlc-package-config.json`` file. +Each entry in ``"model_list"`` of the JSON file has the following fields: + +``model`` + (Required) The path to the MLC-converted model to be built into the app. + + Usually it is a Hugging Face URL (e.g., ``"model": "HF://mlc-ai/phi-2-q4f16_1-MLC"```) that contains the pre-converted model weights. + For iOS, it can also be a path to a local model directory which contains converted model weights (e.g., ``"model": "../dist/gemma-2b-q4f16_1"``). + Please check out :ref:`convert-weights-via-MLC` if you want to build local model into the app. + +``model_id`` + (Required) A unique local identifier to identify the model. + It can be an arbitrary one. + +``estimated_vram_bytes`` + (Required) Estimated requirements of vRAM to run the model. + +``bundle_weight`` + (Optional) A boolean flag indicating whether to bundle model weights into the app. + If this field is set to true, the ``mlc_llm package`` command will copy the model weights + to ``dist/bundle/$model_id``. + +``overrides`` + (Optional) A dictionary to override the default model context window size (to limit the KV cache size) and prefill chunk size (to limit the model temporary execution memory). + Example: + + .. code:: json + + { + "device": "iphone", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 2960000000, + "overrides": { + "context_window_size": 512, + "prefill_chunk_size": 128 + } + } + ] + } + +``model_lib`` + (Optional) A string specifying the system library prefix to use for the model. + Usually this is used when you want to build multiple model variants with the same architecture into the app. + **This field does not affect any app functionality.** + The ``"model_lib_path_for_prepare_libs"`` introduced below is also related. + Example: + + .. code:: json + + { + "device": "iphone", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 2960000000, + "model_lib": "gpt_neox_q4f16_1" + } + ] + } + + +Besides ``model_list`` in ``MLCChat/mlc-package-config.json``, +you can also **optionally** specify a dictionary of ``"model_lib_path_for_prepare_libs"``, +**if you want to use model libraries that are manually compiled**. +The keys of this dictionary should be the ``model_lib`` that specified in model list, +and the values of this dictionary are the paths (absolute, or relative) to the manually compiled model libraries. +The model libraries specified in ``"model_lib_path_for_prepare_libs"`` will be built into the app when running ``mlc_llm package``. +Example: + +.. code:: json + + { + "device": "iphone", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 2960000000, + "model_lib": "gpt_neox_q4f16_1" + } + ], + "model_lib_path_for_prepare_libs": { + "gpt_neox_q4f16_1": "../../dist/lib/RedPajama-INCITE-Chat-3B-v1-q4f16_1-iphone.tar" + } + } + + +Arguments of ``mlc_llm package`` +-------------------------------- + +Command ``mlc_llm package`` can optionally take the arguments below: + +``--package-config`` + A path to ``mlc-package-config.json`` which contains the device and model specification. + By default, it is the ``mlc-package-config.json`` under the current directory. + +``--mlc-llm-home`` + The path to MLC LLM source code (cloned from https://github.com/mlc-ai/mlc-llm). + By default, it is the ``$MLC_LLM_HOME`` environment variable. + If neither ``$MLC_LLM_HOME`` or ``--mlc-llm-home`` is specified, error will be reported. + +``--output`` / ``-o`` + The output directory of ``mlc_llm package`` command. + By default, it is ``dist/`` under the current directory. + + +Summary and What to Do Next +--------------------------- + +In this page, we introduced the ``mlc_llm package`` command for fast model library and weight packaging. + +* It takes input file ``mlc-package-config.json`` which contains the device and model specification for packaging. +* It outputs directory ``dist/``, which contains packaged libraries under ``dist/lib/`` and model weights under ``dist/bundle/``. + +Next, please feel free to check out the :ref:`iOS ` and :ref:`Android ` tutorials for detailed examples of using ``mlc_llm package``. diff --git a/docs/deploy/android.rst b/docs/deploy/android.rst index a9b2fcb18f..0a0d66b704 100644 --- a/docs/deploy/android.rst +++ b/docs/deploy/android.rst @@ -1,6 +1,6 @@ .. _deploy-android: -Android App +Android SDK =========== .. contents:: Table of Contents @@ -35,11 +35,14 @@ Prerequisite ANDROID_NDK: $HOME/Library/Android/sdk/ndk/25.2.9519653 TVM_NDK_CC: $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang -**JDK**, such as OpenJDK >= 17, to compile Java bindings of TVM Unity runtime. It could be installed via Homebrew on macOS, apt on Ubuntu or other package managers. Set up the following environment variable: +**JDK**, such as OpenJDK >= 17, to compile Java bindings of TVM Unity runtime. +We recommended setting the ``JAVA_HOME`` to the JDK bundled with Android Studio. e.g. ``export JAVA_HOME=/Applications/Android\ Studio.app/Contents/jbr/Contents/Home`` for macOS. +In other ways, it could be installed via Homebrew on macOS, apt on Ubuntu or other package managers. +Set up the following environment variable: - ``JAVA_HOME`` so that Java is available in ``$JAVA_HOME/bin/java``. -Please ensure that the JDK versions for Android Studio and JAVA_HOME are the same. We recommended setting the `JAVA_HOME` to the JDK bundled with Android Studio. e.g. `export JAVA_HOME=/Applications/Android\ Studio.app/Contents/jbr/Contents/Home` for macOS. +Please ensure that the JDK versions for Android Studio and JAVA_HOME are the same. **TVM Unity runtime** is placed under `3rdparty/tvm `__ in MLC LLM, so there is no need to install anything extra. Set up the following environment variable: @@ -60,128 +63,258 @@ Check if **environment variable** are properly set as the last check. One way to export JAVA_HOME=... # Java export TVM_HOME=... # TVM Unity runtime -Compile PyTorch Models from HuggingFace ---------------------------------------- -To deploy models on Android with reasonable performance, one has to cross-compile to and fully utilize mobile GPUs using TVM Unity. MLC provides a few pre-compiled models, or one could compile the models on their own. +Build Android App from Source +----------------------------- -**Cloning MLC LLM from GitHub**. Download MLC LLM via the following command: +This section shows how we can build the app from the source. -.. code-block:: bash +Step 1. Install Build Dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - git clone --recursive https://github.com/mlc-ai/mlc-llm/ - ^^^^^^^^^^^ - cd ./mlc-llm/ +First and foremost, please clone the `MLC LLM GitHub repository `_. +After cloning, go to the ``android/`` directory. -.. note:: - ❗ The ``--recursive`` flag is necessary to download submodules like `3rdparty/tvm `__. If you see any file missing during compilation, please double check if git submodules are properly cloned. +.. code:: bash -**Download the PyTorch model** using Git Large File Storage (LFS), and by default, under ``./dist/models/``: + git clone https://github.com/mlc-ai/mlc-llm.git + cd mlc-llm + git submodule update --init --recursive + cd android -.. code-block:: bash - MODEL_NAME=Llama-2-7b-chat-hf - QUANTIZATION=q4f16_1 +.. _android-build-runtime-and-model-libraries: - git lfs install - git clone https://huggingface.co/meta-llama/$MODEL_NAME \ - ./dist/models/ +Step 2. Build Runtime and Model Libraries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -**Compile Android-capable models**. Install TVM Unity compiler as a Python package, and then compile the model for android using the following commands: +The models to be built for the Android app are specified in ``MLCChat/mlc-package-config.json``: +in the ``model_list``, ``model`` points to the Hugging Face repository which -.. code-block:: bash +* ``model`` points to the Hugging Face repository which contains the pre-converted model weights. The Android app will download model weights from the Hugging Face URL. +* ``model_id`` is a unique model identifier. +* ``estimated_vram_bytes`` is an estimation of the vRAM the model takes at runtime. +* ``"bundle_weight": true`` means the model weights of the model will be bundled into the app when building. +* ``overrides`` specifies some model config parameter overrides. - # convert weights - mlc_llm convert_weight ./dist/models/$MODEL_NAME/ --quantization $QUANTIZATION -o dist/$MODEL_NAME-$QUANTIZATION-MLC/ - # create mlc-chat-config.json - mlc_llm gen_config ./dist/models/$MODEL_NAME/ --quantization $QUANTIZATION \ - --conv-template llama-2 --context-window-size 768 -o dist/${MODEL_NAME}-${QUANTIZATION}-MLC/ +We have a one-line command to build and prepare all the model libraries: - # 2. compile: compile model library with specification in mlc-chat-config.json - mlc_llm compile ./dist/${MODEL_NAME}-${QUANTIZATION}-MLC/mlc-chat-config.json \ - --device android -o ./dist/${MODEL_NAME}-${QUANTIZATION}-MLC/${MODEL_NAME}-${QUANTIZATION}-android.tar +.. code:: bash -This generates the directory ``./dist/$MODEL_NAME-$QUANTIZATION-MLC`` which contains the necessary components to run the model, as explained below. + cd /path/to/MLCChat # e.g., "android/MLCChat" + export MLC_LLM_HOME=/path/to/mlc-llm # e.g., "../.." + mlc_llm package -.. note:: - ❗ To run 7B models like llama-2-7B, Mistral-7B, it is recommended to use smaller values of parameter ``--context-window-size`` (``--sliding-window-size`` and ``--prefill-chunk-size`` for sliding window attention) to reduce the memory footprint of the model. Default configurations for certains models can be found under the Android tab in the `Compile Models `_ section. - -**Expected output format**. By default models are placed under ``./dist/${MODEL_NAME}-${QUANTIZATION}-MLC``, and the result consists of 3 major components: +This command mainly executes the following two steps: -- Runtime configuration: It configures conversation templates including system prompts, repetition penalty, sampling including temperature and top-p probability, maximum sequence length, etc. It is usually named as ``mlc-chat-config.json`` alongside with tokenizer configurations. -- Model lib: The compiled library that uses mobile GPU. It is usually named as ``${MODEL_NAME}-${QUANTIZATION}-android.tar``, for example, ``Llama-2-7b-chat-hf-q4f16_1-android.tar``. -- Model weights: the model weights are sharded as ``params_shard_*.bin`` and the metadata is stored in ``ndarray-cache.json`` +1. **Compile models.** We compile each model in ``model_list`` of ``MLCChat/mlc-package-config.json`` into a binary model library. +2. **Build runtime and tokenizer.** In addition to the model itself, a lightweight runtime and tokenizer are required to actually run the LLM. -Create Android Project using Compiled Models --------------------------------------------- +The command creates a ``./dist/`` directory that contains the runtime and model build output. +Please make sure all the following files exist in ``./dist/``. -The source code for MLC LLM is available under ``android/``, including scripts to build dependencies. Enter the directory first: +.. code:: -.. code-block:: bash + dist + └── lib + └── mlc4j + ├── build.gradle + ├── output + │ ├── arm64-v8a + │ │ └── libtvm4j_runtime_packed.so + │ └── tvm4j_core.jar + └── src + ├── cpp + │ └── tvm_runtime.h + └── main + ├── AndroidManifest.xml + ├── assets + │ └── mlc-app-config.json + └── java + └── ai + └── mlc + └── mlcllm + └── ChatModule.java - cd ./android/library +The model execution logic in mobile GPUs is incorporated into ``libtvm4j_runtime_packed.so``, +while ``tvm4j_core.jar`` is a lightweight (~60 kb) `Java binding `_ to it. -**Build necessary dependencies.** Configure the list of models the app comes with using the JSON file ``app-config.json`` which contains two properties `model_list` and `model_lib_path_for_prepare_libs` ``model_lib_path_for_prepare_libs`` contains list of model library paths under `./dist/` that will be bundled with the apk. The ``model_list`` property contains data for models that are not bundled with the apk, but downloaded from the internet at run-time. Each model defined in `model_list` contain the following fields: -``model_url`` - (Required) URL to the repo containing the weights. - -``model_id`` - (Required) Unique local identifier to identify the model. - -``model_lib`` - (Required) Matches the system-lib-prefix, generally set during ``mlc_llm compile`` which can be specified using - ``--system-lib-prefix`` argument. By default, it is set to ``"${model_type}_${quantization}"`` e.g. ``gpt_neox_q4f16_1`` for the RedPajama-INCITE-Chat-3B-v1 model. If the ``--system-lib-prefix`` argument is manually specified during ``mlc_llm compile``, the ``model_lib`` field should be updated accordingly. - -``estimated_vram_bytes`` - (Optional) Estimated requirements of VRAM to run the model. - -To change the configuration, edit ``app-config.json``: +.. note:: -.. code-block:: bash + We leverage a local JIT cache to avoid repetitive compilation of the same input. + However, sometimes it is helpful to force rebuild when we have a new compiler update + or when something goes wrong with the ached library. + You can do so by setting the environment variable ``MLC_JIT_POLICY=REDO`` - vim ./src/main/assets/app-config.json + .. code:: bash -Then bundle the android library ``${MODEL_NAME}-${QUANTIZATION}-android.tar`` compiled from ``mlc_llm compile`` in the previous steps, with TVM Unity's Java runtime by running the commands below: + MLC_JIT_POLICY=REDO mlc_llm package -.. code-block:: bash - ./prepare_libs.sh +Step 3. Build Android App +^^^^^^^^^^^^^^^^^^^^^^^^^ -which generates the two files below: +Open folder ``./android`` as an Android Studio Project. +Connect your Android device to your machine. +In the menu bar of Android Studio, click **"Build → Make Project"**. +Once the build is finished, click **"Run → Run 'app'"** and you will see the app launched on your phone. -.. code-block:: bash +.. note:: + ❗ This app cannot be run in an emulator and thus a physical phone is required, because MLC LLM needs an actual mobile GPU to meaningfully run at an accelerated speed. - >>> find ./build/output -type f - ./build/output/arm64-v8a/libtvm4j_runtime_packed.so - ./build/output/tvm4j_core.jar -The model execution logic in mobile GPUs is incorporated into ``libtvm4j_runtime_packed.so``, while ``tvm4j_core.jar`` is a lightweight (~60 kb) `Java binding `_ to it. +Customize the App +----------------- -**Build the Android app**. Open folder ``./android`` as an Android Studio Project. Connect your Android device to your machine. In the menu bar of Android Studio, click "Build → Make Project". Once the build is finished, click "Run → Run 'app'" and you will see the app launched on your phone. +We can customize the models built in the Android app by customizing `MLCChat/mlc-package-config.json `_. +We introduce each field of the JSON file here. -.. note:: - ❗ This app cannot be run in an emulator and thus a physical phone is required, because MLC LLM needs an actual mobile GPU to meaningfully run at an accelerated speed. +Each entry in ``"model_list"`` of the JSON file has the following fields: -Incorporate Model Weights -------------------------- +``model`` + (Required) The path to the MLC-converted model to be built into the app. + It is a Hugging Face URL (e.g., ``"model": "HF://mlc-ai/phi-2-q4f16_1-MLC"```) that contains + the pre-converted model weights. -Instructions have been provided to build an Android App with MLC LLM in previous sections, but it requires run-time weight downloading from HuggingFace, as configured in `app-config.json` in previous steps under `model_url`. However, it could be desirable to bundle weights together into the app to avoid downloading over the network. In this section, we provide a simple ADB-based walkthrough that hopefully helps with further development. +``model_id`` + (Required) A unique local identifier to identify the model. + It can be an arbitrary one. -**Generating APK**. Enter Android Studio, and click "Build → Generate Signed Bundle/APK" to build an APK for release. If it is the first time you generate an APK, you will need to create a key according to `the official guide from Android `_. This APK will be placed under ``android/app/release/app-release.apk``. +``estimated_vram_bytes`` + (Required) Estimated requirements of vRAM to run the model. + +``bundle_weight`` + (Optional) A boolean flag indicating whether to bundle model weights into the app. See :ref:`android-bundle-model-weights` below. + +``overrides`` + (Optional) A dictionary to override the default model context window size (to limit the KV cache size) and prefill chunk size (to limit the model temporary execution memory). + Example: + + .. code:: json + + { + "device": "android", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 1948348579, + "overrides": { + "context_window_size": 512, + "prefill_chunk_size": 128 + } + } + ] + } -**Install ADB and USB debugging**. Enable "USB debugging" in the developer mode in your phone settings. In SDK manager, install `Android SDK Platform-Tools `_. Add the path to platform-tool path to the environment variable ``PATH``. Run the following commands, and if ADB is installed correctly, your phone will appear as a device: +``model_lib`` + (Optional) A string specifying the system library prefix to use for the model. + Usually this is used when you want to build multiple model variants with the same architecture into the app. + **This field does not affect any app functionality.** + The ``"model_lib_path_for_prepare_libs"`` introduced below is also related. + Example: + + .. code:: json + + { + "device": "android", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 1948348579, + "model_lib": "gpt_neox_q4f16_1" + } + ] + } + + +Besides ``model_list`` in ``MLCChat/mlc-package-config.json``, +you can also **optionally** specify a dictionary of ``"model_lib_path_for_prepare_libs"``, +**if you want to use model libraries that are manually compiled**. +The keys of this dictionary should be the ``model_lib`` that specified in model list, +and the values of this dictionary are the paths (absolute, or relative) to the manually compiled model libraries. +The model libraries specified in ``"model_lib_path_for_prepare_libs"`` will be built into the app when running ``mlc_llm package``. +Example: + +.. code:: json + + { + "device": "android", + "model_list": [ + { + "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", + "model_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + "estimated_vram_bytes": 1948348579, + "model_lib": "gpt_neox_q4f16_1" + } + ], + "model_lib_path_for_prepare_libs": { + "gpt_neox_q4f16_1": "../../dist/lib/RedPajama-INCITE-Chat-3B-v1-q4f16_1-android.tar" + } + } + +.. _android-bundle-model-weights: + +Bundle Model Weights +-------------------- + +Instructions have been provided to build an Android App with MLC LLM in previous sections, +but it requires run-time weight downloading from HuggingFace, +as configured in ``MLCChat/mlc-package-config.json``. +However, it could be desirable to bundle weights together into the app to avoid downloading over the network. +In this section, we provide a simple ADB-based walkthrough that hopefully helps with further development. + +**Enable weight bundle**. +Set the field ``"bundle_weight": true`` for any model you want to bundle weights +in ``MLCChat/mlc-package-config.json``, and run ``mlc_llm package`` again. +Below is an example: + +.. code:: json + + { + "device": "android", + "model_list": [ + { + "model": "HF://mlc-ai/gemma-2b-it-q4f16_1-MLC", + "model_id": "gemma-2b-q4f16_1", + "estimated_vram_bytes": 3000000000, + "bundle_weight": true + } + ] + } + +The outcome of running ``mlc_llm package`` should be as follows: + +.. code:: + + dist + ├── bundle + │ ├── gemma-2b-q4f16_1 # The model weights that will be bundled into the app. + │ └── mlc-app-config.json + └── ... + + +**Generating APK**. Enter Android Studio, and click **"Build → Generate Signed Bundle/APK"** to build an APK for release. If it is the first time you generate an APK, you will need to create a key according to `the official guide from Android `_. +This APK will be placed under ``android/MLCChat/app/release/app-release.apk``. + +**Install ADB and USB debugging**. Enable "USB debugging" in the developer mode in your phone settings. +In "SDK manager - SDK Tools", install `Android SDK Platform-Tools `_. +Add the path to platform-tool path to the environment variable ``PATH`` (on macOS, it is ``$HOME/Library/Android/sdk/platform-tools``). +Run the following commands, and if ADB is installed correctly, your phone will appear as a device: .. code-block:: bash adb devices -**Install the APK and weights to your phone**. Run the commands below replacing ``${MODEL_NAME}`` and ``${QUANTIZATION}`` with the actual model name (e.g. Llama-2-7b-chat-hf) and quantization format (e.g. q4f16_1). +**Install the APK and weights to your phone**. +Run the commands below to install the app, and push the local weights to the app data directory on your device. +Once it finishes, you can start the MLCChat app on your device. +The models with ``bundle_weight`` set to true will have their weights already on device. .. code-block:: bash - adb install android/app/release/app-release.apk - adb push dist/${MODEL_NAME}-${QUANTIZATION}-MLC /data/local/tmp/${MODEL_NAME}-${QUANTIZATION}/ - adb shell "mkdir -p /storage/emulated/0/Android/data/ai.mlc.mlcchat/files/" - adb shell "mv /data/local/tmp/${MODEL_NAME}-${QUANTIZATION} /storage/emulated/0/Android/data/ai.mlc.mlcchat/files/" + cd /path/to/MLCChat # e.g., "android/MLCChat" + python bundle_weight.py --apk-path app/release/app-release.apk diff --git a/docs/deploy/ios.rst b/docs/deploy/ios.rst index b90c48a84d..02aaa55952 100644 --- a/docs/deploy/ios.rst +++ b/docs/deploy/ios.rst @@ -1,7 +1,7 @@ .. _deploy-ios: -iOS App and Swift API -===================== +iOS and Swift SDK +================= .. contents:: Table of Contents :local: @@ -53,41 +53,44 @@ Step 2. Build Runtime and Model Libraries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The models to be built for the iOS app are specified in ``MLCChat/mlc-package-config.json``: -in the ``model_list`` field of this file, ``model`` points to the Hugging Face model repository, -where model weights are downloaded from. ``model_id`` is a unique model identifier. -``estimated_vram_bytes`` is an estimation of the vRAM the model takes at runtime. +in the ``model_list``, + +* ``model`` points to the Hugging Face repository which contains the pre-converted model weights. The iOS app will download model weights from the Hugging Face URL. +* ``model_id`` is a unique model identifier. +* ``estimated_vram_bytes`` is an estimation of the vRAM the model takes at runtime. +* ``"bundle_weight": true`` means the model weights of the model will be bundled into the app when building. +* ``overrides`` specifies some model config parameter overrides. + We have a one-line command to build and prepare all the model libraries: .. code:: bash - cd /path/to/MLCChat - ./prepare_package.sh + cd /path/to/MLCChat # e.g., "ios/MLCChat" + export MLC_LLM_HOME=/path/to/mlc-llm # e.g., "../.." + mlc_llm package This command mainly executes the following two steps: -1. **Build runtime and tokenizer.** In addition to the model itself, a lightweight runtime and tokenizer are required to actually run the LLM. -2. **Compile models.** We compile each model in ``model_list`` of ``MLCChat/mlc-package-config.json`` into a binary model library. +1. **Compile models.** We compile each model in ``model_list`` of ``MLCChat/mlc-package-config.json`` into a binary model library. +2. **Build runtime and tokenizer.** In addition to the model itself, a lightweight runtime and tokenizer are required to actually run the LLM. The command creates a ``./dist/`` directory that contains the runtime and model build output. -Please make sure all the following files exist in ``./dist/``. - -.. code:: bash - - >>> ls ./dist - bundle # The directory for mlc-app-config.json (and optionally model weights) - # that will be bundled into the iOS app. - lib # The directory for runtime and model libraries. +Please make sure ``dist/`` follows the structure below, except the optional model weights. - >>> ls ./dist/bundle - mlc-app-config.json # The app config JSON file. +.. code:: - >>> ls ./dist/lib - libmlc_llm.a # A lightweight interface to interact with LLM, tokenizer, and TVM Unity runtime - libmodel_iphone.a # The compiled model lib - libsentencepiece.a # SentencePiece tokenizer - libtokenizers_cpp.a # Huggingface tokenizer - libtvm_runtime.a # TVM Unity runtime + dist + ├── bundle # The directory for mlc-app-config.json (and optionally model weights) + │ │ # that will be bundled into the iOS app. + │ ├── mlc-app-config.json # The app config JSON file. + │ └── [optional model weights] + └── lib + ├── libmlc_llm.a # A lightweight interface to interact with LLM, tokenizer, and TVM Unity runtime. + ├── libmodel_iphone.a # The compiled model lib. + ├── libsentencepiece.a # SentencePiece tokenizer + ├── libtokenizers_cpp.a # Huggingface tokenizer. + └── libtvm_runtime.a # TVM Unity runtime. .. note:: @@ -99,7 +102,7 @@ Please make sure all the following files exist in ``./dist/``. .. code:: bash - MLC_JIT_POLICY=REDO ./prepare_package.sh + MLC_JIT_POLICY=REDO mlc_llm package .. _ios-bundle-model-weights: @@ -109,12 +112,13 @@ Step 3. (Optional) Bundle model weights into the app By default, we download the model weights from Hugging Face when running the app. **As an option,**, we bundle model weights into the app: set the field ``"bundle_weight": true`` for any model you want to bundle weights -in ``MLCChat/mlc-package-config.json``, and run ``prepare_package.sh`` again. +in ``MLCChat/mlc-package-config.json``, and run ``mlc_llm package`` again. Below is an example: .. code:: json { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/gemma-2b-it-q4f16_1-MLC", @@ -128,13 +132,15 @@ Below is an example: ] } -The outcome of running ``prepare_package.sh`` should be as follows: +The outcome of running ``mlc_llm package`` should be as follows: -.. code:: bash +.. code:: - >>> ls ./dist/bundle - mlc-app-config.json - gemma-2b-it-q4f16_1-MLC # The model weights that will be bundled into the app. + dist + ├── bundle + │ ├── gemma-2b-q4f16_1 # The model weights that will be bundled into the app. + │ └── mlc-app-config.json + └── ... .. _ios-build-app: @@ -190,6 +196,7 @@ Each entry in ``"model_list"`` of the JSON file has the following fields: .. code:: json { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", @@ -213,6 +220,7 @@ Each entry in ``"model_list"`` of the JSON file has the following fields: .. code:: json { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", @@ -229,12 +237,13 @@ you can also **optionally** specify a dictionary of ``"model_lib_path_for_prepar **if you want to use model libraries that are manually compiled**. The keys of this dictionary should be the ``model_lib`` that specified in model list, and the values of this dictionary are the paths (absolute, or relative) to the manually compiled model libraries. -The model libraries specified in ``"model_lib_path_for_prepare_libs"`` will be built into the app when running ``prepare_package.sh``. +The model libraries specified in ``"model_lib_path_for_prepare_libs"`` will be built into the app when running ``mlc_llm package``. Example: .. code:: json { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", @@ -326,6 +335,7 @@ Finally, we add the model into the ``model_list`` of .. code:: json { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/NeuralHermes-2.5-Mistral-7B-q3f16_1-MLC", @@ -346,9 +356,9 @@ Build Apps with MLC Swift API We also provide a Swift package that you can use to build your own app. The package is located under ``ios/MLCSwift``. -- First, create `mlc-package-config.json` and `prepare_package.sh` in your project folder. +- First, create ``mlc-package-config.json`` in your project folder. You do so by copying the files in MLCChat folder. - Run `prepare_package.sh` + Run ``mlc_llm package``. This will give us the necessary libraries under ``/path/to/project/dist``. - Under "Build phases", add ``/path/to/project/dist/bundle`` this will copying this folder into your app to include bundled weights and configs. diff --git a/docs/index.rst b/docs/index.rst index 2d5597d18e..f406908219 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,6 +45,7 @@ Check out :ref:`introduction-to-mlc-llm` for the introduction and tutorial of a compilation/convert_weights.rst compilation/compile_models.rst + compilation/package_model_libraries_weights.rst compilation/define_new_models.rst .. toctree:: diff --git a/ios/.gitignore b/ios/.gitignore index 31d064cacb..f75e36783f 100644 --- a/ios/.gitignore +++ b/ios/.gitignore @@ -1,2 +1,3 @@ xuserdata +MLCSwift/tvm_home *~ diff --git a/ios/MLCChat/README.md b/ios/MLCChat/README.md index 831d7eee73..f4f4820e24 100644 --- a/ios/MLCChat/README.md +++ b/ios/MLCChat/README.md @@ -2,5 +2,5 @@ Checkout [Documentation page](https://llm.mlc.ai/docs/deploy/ios.html) for more information. -- run `./prepare_package.sh` -- open the xcode project +- run `mlc_llm package` +- open the Xcode project diff --git a/ios/MLCChat/mlc-package-config.json b/ios/MLCChat/mlc-package-config.json index 66ca1379f7..094e6e0ddb 100644 --- a/ios/MLCChat/mlc-package-config.json +++ b/ios/MLCChat/mlc-package-config.json @@ -1,4 +1,5 @@ { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/Mistral-7B-Instruct-v0.2-q3f16_1-MLC", diff --git a/ios/MLCChat/prepare_package.sh b/ios/MLCChat/prepare_package.sh deleted file mode 100755 index 6dedca46ae..0000000000 --- a/ios/MLCChat/prepare_package.sh +++ /dev/null @@ -1,10 +0,0 @@ -# This script does two things -# It calls prepare_libs.sh in $MLC_LLM_HOME/ios/ to setup the iOS package and build binaries -# It then calls mlc_llm package to setup the weight and library bundle -# Feel free to copy this file and mlc-package-config.json to your project - -MLC_LLM_HOME="${MLC_LLM_HOME:-../..}" -cd ${MLC_LLM_HOME}/ios && ./prepare_libs.sh $@ && cd - -mkdir -p dist/lib -cp ${MLC_LLM_HOME}/ios/build/lib/* dist/lib/ -python -m mlc_llm package mlc-package-config.json --device iphone -o dist diff --git a/ios/MLCEngineExample/MLCEngineExample/MLCEngineExampleApp.swift b/ios/MLCEngineExample/MLCEngineExample/MLCEngineExampleApp.swift index cf4d3dae53..26361977ce 100644 --- a/ios/MLCEngineExample/MLCEngineExample/MLCEngineExampleApp.swift +++ b/ios/MLCEngineExample/MLCEngineExample/MLCEngineExampleApp.swift @@ -5,7 +5,8 @@ // example and quick testing purposes. // // To build this app, select target My Mac(Designed for iPad) and run -// Make sure you run prepare_package.sh first with "MLCChat" replaced by "MLCEngineExample" +// Make sure you run "mlc_llm package" first with "MLCChat" +// replaced by "MLCEngineExample" // to ensure the "dist/bundle" folder populates with the right model file // and we have the model lib packaged correctly import Foundation @@ -22,9 +23,9 @@ class AppState: ObservableObject { private let bundleURL = Bundle.main.bundleURL.appending(path: "bundle") // model path, this must match a builtin // file name in prepare_params.sh - private let modelPath = "Llama-3-8B-Instruct-q3f16_1-MLC" + private let modelPath = "llama3" // model lib identifier of within the packaged library - // make sure we run prepare_package.sh + // make sure we run "mlc_llm package" private let modelLib = "llama_q3f16_1" // this is a message to be displayed in app diff --git a/ios/MLCEngineExample/README.md b/ios/MLCEngineExample/README.md index 67bf06089b..2e930e497b 100644 --- a/ios/MLCEngineExample/README.md +++ b/ios/MLCEngineExample/README.md @@ -8,5 +8,5 @@ things may not yet be fully functioning and are subject to change Checkout [Documentation page](https://llm.mlc.ai/docs/deploy/ios.html) for more information. -- run `./prepare_package.sh` -- open the xcode project +- run `mlc_llm package` +- open the Xcode project diff --git a/ios/MLCEngineExample/mlc-package-config.json b/ios/MLCEngineExample/mlc-package-config.json index 066fe7fa10..6a3bcaaa5a 100644 --- a/ios/MLCEngineExample/mlc-package-config.json +++ b/ios/MLCEngineExample/mlc-package-config.json @@ -1,4 +1,5 @@ { + "device": "iphone", "model_list": [ { "model": "HF://mlc-ai/Llama-3-8B-Instruct-q3f16_1-MLC", diff --git a/ios/MLCEngineExample/prepare_package.sh b/ios/MLCEngineExample/prepare_package.sh deleted file mode 100755 index d1f022166d..0000000000 --- a/ios/MLCEngineExample/prepare_package.sh +++ /dev/null @@ -1,10 +0,0 @@ -# This script does two things -# It calls prepare_libs.sh in $MLC_LLM_HOME/ios/ to setup the iOS package and build binaries -# It then calls mlc_llm package to setup the weight and library bundle -# Feel free to copy this file and mlc-package-config.json to your project - -MLC_LLM_HOME="${MLC_LLM_HOME:-../..}" -cd ${MLC_LLM_HOME}/ios && ./prepare_libs.sh $@ && cd - -rm -rf dist/lib && mkdir -p dist/lib -cp ${MLC_LLM_HOME}/ios/build/lib/* dist/lib/ -python -m mlc_llm package mlc-package-config.json --device iphone -o dist diff --git a/ios/MLCSwift/tvm_home b/ios/MLCSwift/tvm_home deleted file mode 120000 index e15bf649f5..0000000000 --- a/ios/MLCSwift/tvm_home +++ /dev/null @@ -1 +0,0 @@ -../../3rdparty/tvm \ No newline at end of file diff --git a/ios/README.md b/ios/README.md index de94ee75a0..39f0e0b4b6 100644 --- a/ios/README.md +++ b/ios/README.md @@ -1,3 +1,3 @@ -# MLC-LLM IOS +# MLC-LLM iOS [Documentation page](https://llm.mlc.ai/docs/deploy/ios.html) diff --git a/ios/prepare_libs.sh b/ios/prepare_libs.sh index 58e6468637..ede58c32e0 100755 --- a/ios/prepare_libs.sh +++ b/ios/prepare_libs.sh @@ -1,5 +1,5 @@ # Command to prepare the mlc llm static libraries -# This command will be invoked by prepare_package.sh in the subfolder +# This command will be invoked by the "mlc_llm package" command function help { echo -e "OPTION:" echo -e " -s, --simulator Build for Simulator" @@ -7,6 +7,7 @@ function help { echo -e " -h, --help Prints this help\n" } +MLC_LLM_HOME="${MLC_LLM_HOME:-..}" is_simulator="false" arch="arm64" @@ -53,7 +54,7 @@ fi mkdir -p build/ && cd build/ -cmake ../..\ +cmake $MLC_LLM_HOME\ -DCMAKE_BUILD_TYPE=$type\ -DCMAKE_SYSTEM_NAME=iOS\ -DCMAKE_SYSTEM_VERSION=14.0\ @@ -71,5 +72,5 @@ cmake --build . --config release --target mlc_llm_static -j cmake --build . --target install --config release -j cd .. -rm -rf MLCSwift/tvm_home -ln -s ../../3rdparty/tvm MLCSwift/tvm_home +rm -rf $MLC_LLM_HOME/ios/MLCSwift/tvm_home +ln -s $MLC_LLM_HOME/3rdparty/tvm $MLC_LLM_HOME/ios/MLCSwift/tvm_home diff --git a/python/mlc_llm/cli/package.py b/python/mlc_llm/cli/package.py index f605858d67..b8c6b994c2 100644 --- a/python/mlc_llm/cli/package.py +++ b/python/mlc_llm/cli/package.py @@ -1,5 +1,6 @@ """Command line entrypoint of package.""" +import os from pathlib import Path from typing import Union @@ -22,6 +23,10 @@ def _parse_package_config(path: Union[str, Path]) -> Path: raise ValueError(f"Path {str(path)} is expected to be a JSON file.") return path + def _parse_mlc_llm_home(path: str) -> Path: + os.environ["MLC_LLM_HOME"] = path + return Path(path) + def _parse_output(path: Union[str, Path]) -> Path: path = Path(path) if not path.is_dir(): @@ -29,27 +34,34 @@ def _parse_output(path: Union[str, Path]) -> Path: return path parser.add_argument( - "package_config", + "--package-config", type=_parse_package_config, - help=HELP["config_package"] + " (required)", + default="mlc-package-config.json", + help=HELP["config_package"] + ' (default: "%(default)s")', ) parser.add_argument( - "--device", - type=str, - choices=["iphone", "android"], - required=True, - help=HELP["device_package"] + " (required)", + "--mlc-llm-home", + type=_parse_mlc_llm_home, + default=os.environ.get("MLC_LLM_HOME", None), + help=HELP["mlc_llm_home"] + " (default: the $MLC_LLM_HOME environment variable)", ) parser.add_argument( "--output", "-o", type=_parse_output, - required=True, - help=HELP["output_package"] + " (required)", + default="dist", + help=HELP["output_package"] + ' (default: "%(default)s")', ) parsed = parser.parse_args(argv) + if parsed.mlc_llm_home is None: + raise ValueError( + "MLC LLM home is not specified. " + "Please obtain a copy of MLC LLM source code by " + "cloning https://github.com/mlc-ai/mlc-llm, and set environment variable " + '"MLC_LLM_HOME=path/to/mlc-llm"' + ) package( package_config_path=parsed.package_config, - device=parsed.device, + mlc_llm_home=parsed.mlc_llm_home, output=parsed.output, ) diff --git a/python/mlc_llm/help.py b/python/mlc_llm/help.py index a9b8917990..50e5a3a69a 100644 --- a/python/mlc_llm/help.py +++ b/python/mlc_llm/help.py @@ -189,40 +189,39 @@ "--additional-models model_path_1:model_lib_1 model_path_2 ...". When the model lib of a model is not given, JIT model compilation will be activated to compile the model automatically. -""", +""".strip(), "gpu_memory_utilization_serve": """ A number in (0, 1) denoting the fraction of GPU memory used by the server in total. It is used to infer to maximum possible KV cache capacity. When it is unspecified, it defaults to 0.85. Under mode "local" or "interactive", the actual memory usage may be significantly smaller than this number. Under mode "server", the actual memory usage may be slightly larger than this number. -""", +""".strip(), "speculative_mode_serve": """ The speculative decoding mode. Right now three options are supported: - "disable", where speculative decoding is not enabled, - "small_draft", denoting the normal speculative decoding (small draft) style, - "eagle", denoting the eagle-style speculative decoding. The default mode is "disable". -""", +""".strip(), "spec_draft_length_serve": """ The number of draft tokens to generate in speculative proposal. The default values is 4. -""", +""".strip(), "engine_config_serve": """ The MLCEngine execution configuration. Currently speculative decoding mode is specified via engine config. For example, you can use "--engine-config='spec_draft_length=4;speculative_mode=eagle'" to specify the eagle-style speculative decoding. Check out class `EngineConfig` in mlc_llm/serve/config.py for detailed specification. -""", +""".strip(), "config_package": """ The path to "mlc-package-config.json" which is used for package build. -See "ios/MLCChat/mlc-package-config.json" as an example. -""", - "device_package": """ -The device to build package for. -Options are ["iphone", "android"]. -""", +See "https://github.com/mlc-ai/mlc-llm/blob/main/ios/MLCChat/mlc-package-config.json" as an example. +""".strip(), + "mlc_llm_home": """ +The source code path to MLC LLM. +""".strip(), "output_package": """ The path of output directory for the package build outputs. -""", +""".strip(), } diff --git a/python/mlc_llm/interface/package.py b/python/mlc_llm/interface/package.py index d342ff589d..58ff119cc0 100644 --- a/python/mlc_llm/interface/package.py +++ b/python/mlc_llm/interface/package.py @@ -4,12 +4,11 @@ import json import os import shutil +import subprocess import sys from dataclasses import asdict from pathlib import Path -from typing import List, Literal - -from tvm.contrib import cc +from typing import Any, Dict, List, Literal from mlc_llm.chat_module import ChatConfig, _get_chat_config, _get_model_path from mlc_llm.interface import jit @@ -18,125 +17,14 @@ logging.enable_logging() logger = logging.getLogger(__name__) +SUPPORTED_DEVICES = ["iphone", "android"] -def _get_model_libs(lib_path: Path) -> List[str]: - """Get the model lib prefixes in the given static lib path.""" - global_symbol_map = cc.get_global_symbol_section_map(lib_path) - libs = [] - suffix = "___tvm_dev_mblob" - for name, _ in global_symbol_map.items(): - if name.endswith(suffix): - model_lib = name[: -len(suffix)] - if model_lib.startswith("_"): - model_lib = model_lib[1:] - libs.append(model_lib) - return libs - - -def validate_model_lib( # pylint: disable=too-many-locals - app_config_path: Path, - package_config_path: Path, - model_lib_path_for_prepare_libs: dict, - device: Literal["iphone", "android"], - output: Path, -) -> None: - """Validate the model lib prefixes of model libraries.""" - # pylint: disable=import-outside-toplevel,redefined-outer-name,shadowed-import,reimported - if device == "android": - from tvm.contrib import ndk as cc - else: - from tvm.contrib import cc - # pylint: enable=import-outside-toplevel,redefined-outer-name,shadowed-import,reimported - - with open(app_config_path, "r", encoding="utf-8") as file: - app_config = json.load(file) - - tar_list = [] - model_set = set() - - for model, model_lib_path in model_lib_path_for_prepare_libs.items(): - model_lib_path = os.path.join(model_lib_path) - lib_path_valid = os.path.isfile(model_lib_path) - if not lib_path_valid: - raise RuntimeError(f"Cannot find file {model_lib_path} as an {device} model library") - tar_list.append(model_lib_path) - model_set.add(model) - - os.makedirs(output / "lib", exist_ok=True) - lib_path = ( - output / "lib" / ("libmodel_iphone.a" if device == "iphone" else "libmodel_android.a") - ) - - cc.create_staticlib(lib_path, tar_list) - available_model_libs = _get_model_libs(lib_path) - logger.info("Creating lib from %s", str(tar_list)) - logger.info("Validating the library %s", str(lib_path)) - logger.info( - "List of available model libs packaged: %s," - " if we have '-' in the model_lib string, it will be turned into '_'", - str(available_model_libs), - ) - global_symbol_map = cc.get_global_symbol_section_map(lib_path) - error_happened = False - - for item in app_config["model_list"]: - model_lib = item["model_lib"] - model_id = item["model_id"] - if model_lib not in model_set: - # NOTE: this cannot happen under new setting - # since if model_lib is not included, it will be jitted - raise RuntimeError( - f"ValidationError: model_lib={model_lib} specified for model_id={model_id} " - "is not included in model_lib_path_for_prepare_libs argument, " - "This will cause the specific model not being able to load, " - f"model_lib_path_for_prepare_libs={model_lib_path_for_prepare_libs}" - ) - - model_prefix_pattern = model_lib.replace("-", "_") + "___tvm_dev_mblob" - if ( - model_prefix_pattern not in global_symbol_map - and "_" + model_prefix_pattern not in global_symbol_map - ): - # NOTE: no lazy format is ok since this is a slow pass - model_lib_path = model_lib_path_for_prepare_libs[model_lib] - log_msg = ( - "ValidationError:\n" - f"\tmodel_lib {model_lib} requested in {str(app_config_path)}" - f" is not found in {str(lib_path)}\n" - f"\tspecifically the model_lib for {model_lib_path}.\n" - f"\tcurrent available model_libs in {str(lib_path)}: {available_model_libs}\n" - f"\tThis can happen when we manually specified model_lib_path_for_prepare_libs" - f" in {str(package_config_path)}\n" - f"\tConsider remove model_lib_path_for_prepare_libs (so library can be jitted)" - "or check the compile command" - ) - logger.info(log_msg) - error_happened = True - - if not error_happened: - logger.info(style.green("Validation pass")) - else: - logger.info(style.red("Validation failed")) - sys.exit(255) - - -def package( # pylint: disable=too-many-locals,too-many-statements,too-many-branches - package_config_path: Path, - device: Literal["iphone", "android"], - output: Path, -) -> None: - """Python entrypoint of package.""" - # - Read package config. - with open(package_config_path, "r", encoding="utf-8") as file: - package_config = json.load(file) - if not isinstance(package_config, dict): - raise ValueError( - "The content of MLC package config is expected to be a dict with " - f'field "model_list". However, the content of "{package_config_path}" is not a dict.' - ) +def build_model_library( # pylint: disable=too-many-branches,too-many-locals,too-many-statements + package_config: Dict[str, Any], device: str, bundle_dir: Path, app_config_path: Path +) -> Dict[str, str]: + """Build model libraries. Return the dictionary of "library prefix to lib path".""" # - Create the bundle directory. - bundle_dir = output / "bundle" os.makedirs(bundle_dir, exist_ok=True) # Clean up all the directories in `output/bundle`. logger.info('Clean up all directories under "%s"', str(bundle_dir)) @@ -242,7 +130,7 @@ def package( # pylint: disable=too-many-locals,too-many-statements,too-many-bra ) ) # Overwrite the model weight directory in bundle. - bundle_model_weight_path = bundle_dir / model_path.name + bundle_model_weight_path = bundle_dir / model_id logger.info( "Bundle weight for %s, copy into %s", style.bold(model_id), @@ -251,7 +139,8 @@ def package( # pylint: disable=too-many-locals,too-many-statements,too-many-bra if bundle_model_weight_path.exists(): shutil.rmtree(bundle_model_weight_path) shutil.copytree(model_path, bundle_model_weight_path) - app_config_model_entry["model_path"] = model_path.name + if bundle_weight and device == "iphone": + app_config_model_entry["model_path"] = model_id else: app_config_model_entry["model_url"] = model.replace("HF://", "https://huggingface.co/") @@ -265,15 +154,217 @@ def package( # pylint: disable=too-many-locals,too-many-statements,too-many-bra {"model_list": app_config_model_list}, indent=2, ) - app_config_path = bundle_dir / "mlc-app-config.json" with open(app_config_path, "w", encoding="utf-8") as file: print(app_config_json_str, file=file) logger.info( - 'Dump the app config below to "dist/bundle/mlc-app-config.json":\n%s', + 'Dump the app config below to "%s":\n%s', + str(app_config_path), style.green(app_config_json_str), ) + return model_lib_path_for_prepare_libs + + +def validate_model_lib( # pylint: disable=too-many-locals + app_config_path: Path, + package_config_path: Path, + model_lib_path_for_prepare_libs: dict, + device: Literal["iphone", "android"], + output: Path, +) -> None: + """Validate the model lib prefixes of model libraries.""" + # pylint: disable=import-outside-toplevel,redefined-outer-name,shadowed-import,reimported + if device == "android": + from tvm.contrib import ndk as cc + else: + from tvm.contrib import cc + # pylint: enable=import-outside-toplevel,redefined-outer-name,shadowed-import,reimported + + with open(app_config_path, "r", encoding="utf-8") as file: + app_config = json.load(file) + + tar_list = [] + model_set = set() + + for model, model_lib_path in model_lib_path_for_prepare_libs.items(): + model_lib_path = os.path.join(model_lib_path) + lib_path_valid = os.path.isfile(model_lib_path) + if not lib_path_valid: + raise RuntimeError(f"Cannot find file {model_lib_path} as an {device} model library") + tar_list.append(model_lib_path) + model_set.add(model) + + os.makedirs(output / "lib", exist_ok=True) + lib_path = ( + output / "lib" / ("libmodel_iphone.a" if device == "iphone" else "libmodel_android.a") + ) + + def _get_model_libs(lib_path: Path) -> List[str]: + """Get the model lib prefixes in the given static lib path.""" + global_symbol_map = cc.get_global_symbol_section_map(lib_path) + libs = [] + suffix = "___tvm_dev_mblob" + for name, _ in global_symbol_map.items(): + if name.endswith(suffix): + model_lib = name[: -len(suffix)] + if model_lib.startswith("_"): + model_lib = model_lib[1:] + libs.append(model_lib) + return libs + + cc.create_staticlib(lib_path, tar_list) + available_model_libs = _get_model_libs(lib_path) + logger.info("Creating lib from %s", str(tar_list)) + logger.info("Validating the library %s", str(lib_path)) + logger.info( + "List of available model libs packaged: %s," + " if we have '-' in the model_lib string, it will be turned into '_'", + str(available_model_libs), + ) + global_symbol_map = cc.get_global_symbol_section_map(lib_path) + error_happened = False + + for item in app_config["model_list"]: + model_lib = item["model_lib"] + model_id = item["model_id"] + if model_lib not in model_set: + # NOTE: this cannot happen under new setting + # since if model_lib is not included, it will be jitted + raise RuntimeError( + f"ValidationError: model_lib={model_lib} specified for model_id={model_id} " + "is not included in model_lib_path_for_prepare_libs argument, " + "This will cause the specific model not being able to load, " + f"model_lib_path_for_prepare_libs={model_lib_path_for_prepare_libs}" + ) + + model_prefix_pattern = model_lib.replace("-", "_") + "___tvm_dev_mblob" + if ( + model_prefix_pattern not in global_symbol_map + and "_" + model_prefix_pattern not in global_symbol_map + ): + # NOTE: no lazy format is ok since this is a slow pass + model_lib_path = model_lib_path_for_prepare_libs[model_lib] + log_msg = ( + "ValidationError:\n" + f"\tmodel_lib {model_lib} requested in {str(app_config_path)}" + f" is not found in {str(lib_path)}\n" + f"\tspecifically the model_lib for {model_lib_path}.\n" + f"\tcurrent available model_libs in {str(lib_path)}: {available_model_libs}\n" + f"\tThis can happen when we manually specified model_lib_path_for_prepare_libs" + f" in {str(package_config_path)}\n" + f"\tConsider remove model_lib_path_for_prepare_libs (so library can be jitted)" + "or check the compile command" + ) + logger.info(log_msg) + error_happened = True + + if not error_happened: + logger.info(style.green("Validation pass")) + else: + logger.info(style.red("Validation failed")) + sys.exit(255) + + +def build_android_binding(mlc_llm_home: Path, output: Path) -> None: + """Build android binding in MLC LLM""" + mlc4j_path = mlc_llm_home / "android" / "mlc4j" + + # Move the model libraries to "build/lib/" for linking + os.makedirs(Path("build") / "lib", exist_ok=True) + src_path = str(output / "lib" / "libmodel_android.a") + dst_path = str(Path("build") / "lib" / "libmodel_android.a") + logger.info('Moving "%s" to "%s"', src_path, dst_path) + shutil.move(src_path, dst_path) + + # Build mlc4j + logger.info("Building mlc4j") + subprocess.run([sys.executable, mlc4j_path / "prepare_libs.py"], check=True, env=os.environ) + # Copy built files back to output directory. + lib_path = output / "lib" / "mlc4j" + os.makedirs(lib_path, exist_ok=True) + logger.info('Clean up all directories under "%s"', str(lib_path)) + for content_path in lib_path.iterdir(): + if content_path.is_dir(): + shutil.rmtree(content_path) + + src_path = str(mlc4j_path / "src") + dst_path = str(lib_path / "src") + logger.info('Copying "%s" to "%s"', src_path, dst_path) + shutil.copytree(src_path, dst_path) + + src_path = str(mlc4j_path / "build.gradle") + dst_path = str(lib_path / "build.gradle") + logger.info('Copying "%s" to "%s"', src_path, dst_path) + shutil.copy(src_path, dst_path) + + src_path = str(Path("build") / "output") + dst_path = str(lib_path / "output") + logger.info('Copying "%s" to "%s"', src_path, dst_path) + shutil.copytree(src_path, dst_path) + + os.makedirs(lib_path / "src" / "main" / "assets") + src_path = str(output / "bundle" / "mlc-app-config.json") + dst_path = str(lib_path / "src" / "main" / "assets" / "mlc-app-config.json") + logger.info('Moving "%s" to "%s"', src_path, dst_path) + shutil.move(src_path, dst_path) + + +def build_iphone_binding(mlc_llm_home: Path, output: Path) -> None: + """Build iOS binding in MLC LLM""" + # Build iphone binding + logger.info("Build iphone binding") + subprocess.run(["bash", mlc_llm_home / "ios" / "prepare_libs.sh"], check=True, env=os.environ) + + # Copy built libraries back to output directory. + for static_library in (Path("build") / "lib").iterdir(): + dst_path = str(output / "lib" / static_library.name) + logger.info('Copying "%s" to "%s"', static_library, dst_path) + shutil.copy(static_library, dst_path) + + +def package( + package_config_path: Path, + mlc_llm_home: Path, + output: Path, +) -> None: + """Python entrypoint of package.""" + logger.info('MLC LLM HOME: "%s"', mlc_llm_home) + + # - Read package config. + with open(package_config_path, "r", encoding="utf-8") as file: + package_config = json.load(file) + if not isinstance(package_config, dict): + raise ValueError( + "The content of MLC package config is expected to be a dict with " + f'field "model_list". However, the content of "{package_config_path}" is not a dict.' + ) + + # - Read device. + if "device" not in package_config: + raise ValueError(f'JSON file "{package_config_path}" is required to have field "device".') + device = package_config["device"] + if device not in SUPPORTED_DEVICES: + raise ValueError( + f'The "device" field of JSON file {package_config_path} is expected to be one of ' + f'{SUPPORTED_DEVICES}, while "{device}" is given in the JSON.' + ) + bundle_dir = output / "bundle" + app_config_path = bundle_dir / "mlc-app-config.json" + # - Build model libraries. + model_lib_path_for_prepare_libs = build_model_library( + package_config, device, bundle_dir, app_config_path + ) # - Validate model libraries. validate_model_lib( app_config_path, package_config_path, model_lib_path_for_prepare_libs, device, output ) + + # - Copy model libraries + if device == "android": + build_android_binding(mlc_llm_home, output) + elif device == "iphone": + build_iphone_binding(mlc_llm_home, output) + else: + assert False, "Cannot reach here" + + logger.info("All finished.")