diff --git a/.pins/torchao-pin.txt b/.pins/torchao-pin.txt new file mode 100644 index 000000000..a3402d40c --- /dev/null +++ b/.pins/torchao-pin.txt @@ -0,0 +1 @@ +85d03de43160328eaf350e7ec3877d3d7b57da50 diff --git a/generate.py b/generate.py index fc48375d2..57004b057 100644 --- a/generate.py +++ b/generate.py @@ -395,6 +395,13 @@ def decode_n_tokens( ) input_pos += 1 break + if _i == 1: + t0 = time.time() + if _i == num_new_tokens - 2: + t1 = time.time() + print(f"\nTime to generate {num_new_tokens-2} tokens: {t1-t0}") + print(f"\nTokens/sec to generate {num_new_tokens-2} tokens: {(num_new_tokens-2) / (t1-t0)}") + if not encountered_eos: eos_token = torch.tensor( diff --git a/quantization/quantize.py b/quantization/quantize.py index 8efc4fa08..a1232327e 100644 --- a/quantization/quantize.py +++ b/quantization/quantize.py @@ -92,9 +92,21 @@ def quantize_model( try: # Easier to ask forgiveness than permission - quant_handler = ao_quantizer_class_dict[quantizer]( - groupsize=q_kwargs["groupsize"], device=device, precision=precision - ) + if quantizer == "linear:a8wlow": + quant_handler = ao_quantizer_class_dict[quantizer]( + device=device, + precision=precision, + bitwidth=q_kwargs.get("bitwidth", 4), + groupsize=q_kwargs.get("groupsize", 128), + has_weight_zeros=q_kwargs.get("has_weight_zeros", False), + squeeze_unsqueeze_dim0=True, + ) + else: + quant_handler = ao_quantizer_class_dict[quantizer]( + groupsize=q_kwargs["groupsize"], + device=device, + precision=precision, + ) except TypeError as e: if "unexpected keyword argument 'device'" in str(e): quant_handler = ao_quantizer_class_dict[quantizer]( @@ -581,3 +593,33 @@ def quantized_model(self) -> nn.Module: "linear:int4": Int4WeightOnlyQuantizer, "linear:a8w4dq": Int8DynActInt4WeightQuantizer, } + +try: + import importlib.util + import sys + import os + torchao_build_path = f"{os.getcwd()}/torchao-build" + + # Load quantizer + torchao_experimental_spec = importlib.util.spec_from_file_location( + "torchao_experimental", + f"{torchao_build_path}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op/torch_custom_op.py", + ) + torchao_experimental = importlib.util.module_from_spec(torchao_experimental_spec) + sys.modules["torchao_experimental"] = torchao_experimental + torchao_experimental_spec.loader.exec_module(torchao_experimental) + from torchao_experimental import Int8DynActLowbitWeightQuantizer + ao_quantizer_class_dict["linear:a8wlow"] = Int8DynActLowbitWeightQuantizer + + # Try loading custom op + try: + import glob + libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*") + libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) + torch.ops.load_library(libs[0]) + except Exception as e: + print("Failed to load custom ops : ", e) + print("Slow fallback kernels will be used.") + +except Exception as e: + print(f"Failed to use torchao_experimental kernels: {e}") diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 156e9bcce..24ec6e505 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -28,3 +28,6 @@ if(Torch_FOUND) target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m) set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17) endif() + + +target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") diff --git a/runner/et.cmake b/runner/et.cmake index 7fc16b1f2..6eeb8f59b 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -129,3 +129,5 @@ if(executorch_FOUND) else() MESSAGE(WARNING "ExecuTorch package not found") endif() + +target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}") diff --git a/scripts/build_native.sh b/scripts/build_native.sh index 6ceea0aee..7e7574aed 100755 --- a/scripts/build_native.sh +++ b/scripts/build_native.sh @@ -60,6 +60,10 @@ if [ -z "${ET_BUILD_DIR}" ]; then ET_BUILD_DIR="et-build" fi +if [ -z "${TORCHAO_BUILD_DIR}" ]; then + TORCHAO_BUILD_DIR="torchao-build" +fi + source "$TORCHCHAT_ROOT/scripts/install_utils.sh" pushd ${TORCHCHAT_ROOT} @@ -70,6 +74,10 @@ if [[ "$TARGET" == "et" ]]; then install_pip_dependencies clone_executorch install_executorch_libs false + + EXECUTORCH_INCLUDE_DIRS=${TORCHCHAT_ROOT}/et-build/src + EXECUTORCH_LIBRARIES=${TORCHCHAT_ROOT}/et-build/install/lib/libexecutorch_no_prim_ops.a + install_torchao_custom_executorch_ops fi popd diff --git a/scripts/build_torchao_custom_ops.sh b/scripts/build_torchao_custom_ops.sh new file mode 100644 index 000000000..cf0626afb --- /dev/null +++ b/scripts/build_torchao_custom_ops.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +if [ -z "${TORCHCHAT_ROOT}" ]; then + # Get the absolute path of the current script + SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + # Get the absolute path of the parent directory + TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")" +fi + +if [ -z "${TORCHAO_BUILD_DIR}" ]; then + TORCHAO_BUILD_DIR="torchao-build" +fi + +source "$TORCHCHAT_ROOT/scripts/install_utils.sh" + +find_cmake_prefix_path +clone_torchao +install_torchao_custom_aten_ops diff --git a/scripts/install_utils.sh b/scripts/install_utils.sh index 3b3ad4926..a7badf720 100644 --- a/scripts/install_utils.sh +++ b/scripts/install_utils.sh @@ -124,3 +124,54 @@ install_executorch_libs() { install_executorch_python_libs $1 } + +clone_torchao() { + echo "Cloning torchao to ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src" + rm -rf ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src + mkdir -p ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src + pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src + echo $pwd + + cp -R /Users/scroy/fbsource/fbcode/pytorch/ao . + # git clone https://github.com/pytorch/ao.git + # cd ao + # git checkout $(cat ${TORCHCHAT_ROOT}/.pins/torchao-pin.txt) + + popd +} + +install_torchao_custom_aten_ops() { + echo "Installing custom torchao ops" + pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op + export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao + + if [ "${CMAKE_OUT_DIR}" == "" ]; then + CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/cmake-out" + fi + + cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DPLATFORM="ATEN" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} +} + +install_torchao_custom_executorch_ops() { + echo "Installing custom torchao ops" + pushd ${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op + export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/src/ao + + if [ "${CMAKE_OUT_DIR}" == "" ]; then + CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/${TORCHAO_BUILD_DIR}/cmake-out" + fi + + cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \ + -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \ + -DPLATFORM="EXECUTORCH" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} +}