diff --git a/.bazelversion b/.bazelversion index fae6e3d04..0062ac971 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -4.2.1 +5.0.0 diff --git a/.github/tools/release_linux.sh b/.github/tools/release_linux.sh index f1ee04254..2fd22064f 100755 --- a/.github/tools/release_linux.sh +++ b/.github/tools/release_linux.sh @@ -9,10 +9,10 @@ bazel build :build_pip_pkg \ --copt=-mavx \ --distinct_host_configuration=false \ --verbose_failures \ - --crosstool_top=//third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11:toolchain + --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain # Package Whl bazel-bin/build_pip_pkg artifacts -# Remove manylinux2010 config flags so that normal builds work as expected +# Remove manylinux2014 config flags so that normal builds work as expected rm -f .lce_configure.bazelrc diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e78f16a19..fe3375d98 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -213,7 +213,7 @@ jobs: path: wheelhouse manylinux-release-wheel: - name: Build release wheels for manylinux2010 + name: Build release wheels for manylinux2014 runs-on: ubuntu-18.04 strategy: matrix: @@ -228,7 +228,7 @@ jobs: continue-on-error: true with: credentials_json: ${{ secrets.gcs_bazel_cache }} - - name: Build manylinux2010 wheels + - name: Build manylinux2014 wheels run: | if [[ -n $GOOGLE_APPLICATION_CREDENTIALS ]]; then echo -e 'build --remote_http_cache=https://storage.googleapis.com/plumerai-bazel-cache/lce-release-manylinux-python${{ matrix.python-version }}' >> .bazelrc.user @@ -239,14 +239,14 @@ jobs: -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcloud-credentials.json \ -v $GOOGLE_APPLICATION_CREDENTIALS:/tmp/gcloud-credentials.json:ro \ -v ${PWD}:/compute-engine -w /compute-engine \ - tensorflow/build:2.8-python${{ matrix.python-version }} \ + tensorflow/build:2.9-python${{ matrix.python-version }} \ .github/tools/release_linux.sh sudo apt-get -y -qq install patchelf --no-install-recommends python -m pip install auditwheel --no-cache-dir for f in artifacts/*.whl; do - auditwheel repair --plat manylinux2010_x86_64 $f + auditwheel repair --plat manylinux2014_x86_64 $f done ls -al wheelhouse/ diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 2cf449b92..209e0791e 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -87,7 +87,7 @@ jobs: if: github.ref != 'refs/heads/main' shell: bash - name: Install pip dependencies - run: pip install tensorflow-cpu~=2.8.0 larq~=0.11 larq_zoo~=2.0 pytest tensorflow_datasets~=4.4 flatbuffers==1.12 tqdm --no-cache-dir + run: pip install tensorflow-cpu~=2.9.0 larq~=0.11 larq_zoo~=2.0 pytest tensorflow_datasets~=4.4 flatbuffers==1.12 tqdm --no-cache-dir - name: Run Interpreter test run: bazelisk test larq_compute_engine/tflite/tests:interpreter_test --test_output=all - name: Run FileCheck tests @@ -101,7 +101,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - tf-version: [1.14.0, 1.15.5, 2.0.4, 2.1.4, 2.2.3, 2.3.3, 2.4.4, 2.5.3, 2.6.3, 2.7.1, 2.8.0] + tf-version: [1.14.0, 1.15.5, 2.0.4, 2.1.4, 2.2.3, 2.3.3, 2.4.4, 2.5.3, 2.6.4, 2.7.2, 2.8.1, 2.9.0] if: "!contains(github.event.head_commit.message, 'ci-skip')" steps: - uses: actions/checkout@v3 diff --git a/CMakeLists.txt b/CMakeLists.txt index e0f5d38ee..cf4e429d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,26 +67,10 @@ if(COMPILE_BENCHMARK) ${LCE_SOURCE_DIR}/tflite/benchmark/lce_benchmark_tflite_model.h ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_model.h ) - set(TFLITE_BENCHMARK_SRCS # from ${TFLITE_SOURCE_DIR}/tools/benchmark/CMakeLists.txt - ${TENSORFLOW_SOURCE_DIR}/tensorflow/core/util/stats_calculator.cc - ${TFLITE_SOURCE_DIR}/kernels/internal/utils/sparsity_format_converter.cc - ${TFLITE_SOURCE_DIR}/profiling/memory_info.cc - ${TFLITE_SOURCE_DIR}/profiling/memory_usage_monitor.cc - ${TFLITE_SOURCE_DIR}/profiling/profile_summarizer.cc - ${TFLITE_SOURCE_DIR}/profiling/profile_summary_formatter.cc - ${TFLITE_SOURCE_DIR}/profiling/time.cc - ${TFLITE_SOURCE_DIR}/tools/command_line_flags.cc - ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_model.cc - ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_performance_options.cc - ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_tflite_model.cc - ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_utils.cc - ${TFLITE_SOURCE_DIR}/tools/benchmark/profiling_listener.cc - ${TFLITE_SOURCE_DIR}/tools/delegates/default_execution_provider.cc - ${TFLITE_SOURCE_DIR}/tools/delegates/delegate_provider.cc - ${TFLITE_SOURCE_DIR}/tools/delegates/xnnpack_delegate_provider.cc - ${TFLITE_SOURCE_DIR}/tools/evaluation/utils.cc - ${TFLITE_SOURCE_DIR}/tools/tool_params.cc - ) + + get_directory_property(TFLITE_BENCHMARK_SRCS DIRECTORY ${TFLITE_SOURCE_DIR}/tools/benchmark DEFINITION TFLITE_BENCHMARK_SRCS) + list(FILTER TFLITE_BENCHMARK_SRCS EXCLUDE REGEX benchmark_main.cc) + add_executable(lce_benchmark_model ${TFLITE_BENCHMARK_SRCS} ${LCE_CORE_SRCS} ${LCE_CORE_HDRS} diff --git a/Dockerfile b/Dockerfile index 895cfa401..4489c919a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,5 @@ RUN pip install six numpy --no-cache-dir WORKDIR /compute-engine COPY . . RUN ./third_party/install_android.sh -ENV MANYLINUX2010=1 RUN ./configure.py RUN bazelisk --version diff --git a/WORKSPACE b/WORKSPACE index b51870e52..dabd97ae7 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -17,10 +17,10 @@ http_archive( "//third_party/tensorflow_patches:disable_forced_mkl.patch", "//third_party/tensorflow_patches:fix_armhf_xnnpack.patch", ], - sha256 = "66b953ae7fba61fd78969a2e24e350b26ec116cf2e6a7eb93d02c63939c6f9f7", - strip_prefix = "tensorflow-2.8.0", + sha256 = "8087cb0c529f04a4bfe480e49925cd64a904ad16d8ec66b98e2aacdfd53c80ff", + strip_prefix = "tensorflow-2.9.0", urls = [ - "https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz", + "https://github.com/tensorflow/tensorflow/archive/v2.9.0.tar.gz", ], ) diff --git a/larq_compute_engine/mlir/BUILD b/larq_compute_engine/mlir/BUILD index cd1e4f6b3..a78326f05 100644 --- a/larq_compute_engine/mlir/BUILD +++ b/larq_compute_engine/mlir/BUILD @@ -32,7 +32,7 @@ td_library( srcs = ["transforms/op_removal_patterns.td"], includes = ["/external/org_tensorflow"], deps = [ - "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:FuncTdFiles", "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files", ], ) @@ -43,7 +43,7 @@ td_library( includes = ["/external/org_tensorflow"], deps = [ ":lce_ops_td_file", - "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:FuncTdFiles", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_td_files", ], ) @@ -54,7 +54,7 @@ td_library( includes = ["/external/org_tensorflow"], deps = [ ":lce_ops_td_file", - "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:FuncTdFiles", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_td_files", "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files", ], @@ -182,7 +182,7 @@ gentbl_cc_library( td_file = "transforms/bitpack_activations_patterns.td", deps = [ ":lce_ops_td_file", - "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:FuncTdFiles", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_td_files", ], ) @@ -199,7 +199,7 @@ gentbl_cc_library( td_file = "transforms/bitpack_weights_patterns.td", deps = [ ":lce_ops_td_file", - "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:FuncTdFiles", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_td_files", "@org_tensorflow//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_td_files", ], @@ -288,7 +288,7 @@ cc_library( "transforms/passes.h", ], deps = [ - "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:FuncDialect", "@org_tensorflow//tensorflow/compiler/mlir/tensorflow", ], alwayslink = 1, @@ -308,7 +308,7 @@ cc_library( deps = [ ":larq_compute_engine", "//larq_compute_engine/core:types", - "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:FuncDialect", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite", "@org_tensorflow//tensorflow/compiler/mlir/lite:tensorflow_lite_legalize_tf", "@org_tensorflow//tensorflow/compiler/mlir/lite:validators", @@ -429,7 +429,7 @@ cc_library( "transforms/passes.h", ], deps = [ - "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:FuncDialect", ], alwayslink = 1, ) diff --git a/larq_compute_engine/mlir/ir/lce_ops.td b/larq_compute_engine/mlir/ir/lce_ops.td index 9db54dab4..3f58365c9 100644 --- a/larq_compute_engine/mlir/ir/lce_ops.td +++ b/larq_compute_engine/mlir/ir/lce_ops.td @@ -56,7 +56,7 @@ def LarqDialect : Dialect { //===----------------------------------------------------------------------===// // Base class for the operation in this dialect -class LQ_Op traits = []> : +class LQ_Op traits = []> : Op { let extraClassDeclaration = [{ diff --git a/larq_compute_engine/mlir/lce_mlir_opt.cc b/larq_compute_engine/mlir/lce_mlir_opt.cc index a1ef47e25..f3f9b5f97 100644 --- a/larq_compute_engine/mlir/lce_mlir_opt.cc +++ b/larq_compute_engine/mlir/lce_mlir_opt.cc @@ -1,7 +1,7 @@ #include "larq_compute_engine/mlir/ir/lce_ops.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Quant/QuantOps.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Support/MlirOptMain.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "mlir/Transforms/Passes.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" @@ -9,10 +9,9 @@ int main(int argc, char** argv) { mlir::registerTransformsPasses(); mlir::DialectRegistry registry; - registry.insert(); - return failed(mlir::MlirOptMain(argc, argv, - "Larq Compute Engine pass driver\n", registry, - /*preloadDialectsInContext=*/false)); + return failed(mlir::MlirOptMain( + argc, argv, "Larq Compute Engine pass driver\n", registry)); } diff --git a/larq_compute_engine/mlir/python/common.cc b/larq_compute_engine/mlir/python/common.cc index 9f3c2548b..78a7e836e 100644 --- a/larq_compute_engine/mlir/python/common.cc +++ b/larq_compute_engine/mlir/python/common.cc @@ -36,10 +36,11 @@ LCETarget GetLCETarget(const std::string& target_str) { } } -Status GetNumInputs(mlir::OwningModuleRef* module, int* num_inputs) { +Status GetNumInputs(mlir::OwningOpRef* module, + int* num_inputs) { *num_inputs = 0; - mlir::FuncOp entry_function = nullptr; - for (auto func : module->get().getOps()) { + mlir::func::FuncOp entry_function = nullptr; + for (auto func : module->get().getOps()) { if (auto tf_attrs = func->getAttrOfType("tf.entry_function")) { // TODO(jaesung): There could be multiple entry functions. Let's handle @@ -70,13 +71,13 @@ Status GetNumInputs(mlir::OwningModuleRef* module, int* num_inputs) { } pybind11::bytes ConvertMLIRModuleToTFLiteFlatBuffer( - mlir::OwningModuleRef* module, mlir::MLIRContext& context, + mlir::OwningOpRef* module, mlir::MLIRContext& context, const LCETarget target, const pybind11::object& default_ranges, const std::unordered_set& saved_model_tags, llvm::StringRef saved_model_dir, llvm::Optional session, const int num_inputs, const bool should_quantize, const bool mark_as_post_training_quant) { - mlir::TFL::QuantizationSpecs quant_specs; + mlir::quant::QuantizationSpecs quant_specs; if (should_quantize) { // Normally we'd only set `inference_type` to QINT8 when there are // fake_quant nodes in the graph. However this did not work reliably, and diff --git a/larq_compute_engine/mlir/python/common.h b/larq_compute_engine/mlir/python/common.h index 72e661191..be21ac015 100644 --- a/larq_compute_engine/mlir/python/common.h +++ b/larq_compute_engine/mlir/python/common.h @@ -9,10 +9,10 @@ namespace tensorflow { LCETarget GetLCETarget(const std::string& target_str); -Status GetNumInputs(mlir::OwningModuleRef* module, int* num_inputs); +Status GetNumInputs(mlir::OwningOpRef* module, int* num_inputs); pybind11::bytes ConvertMLIRModuleToTFLiteFlatBuffer( - mlir::OwningModuleRef* module, mlir::MLIRContext& context, + mlir::OwningOpRef* module, mlir::MLIRContext& context, const LCETarget target, const pybind11::object& default_ranges, const std::unordered_set& saved_model_tags, llvm::StringRef saved_model_dir, diff --git a/larq_compute_engine/mlir/tests/bitpack-weights.mlir b/larq_compute_engine/mlir/tests/bitpack-weights.mlir index 86fbdd696..bb419438d 100644 --- a/larq_compute_engine/mlir/tests/bitpack-weights.mlir +++ b/larq_compute_engine/mlir/tests/bitpack-weights.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -tfl-lce-bitpack-weights -verify-diagnostics | FileCheck %s // CHECK-LABEL: @bitpack_bconv2d_filters -func @bitpack_bconv2d_filters(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { +func.func @bitpack_bconv2d_filters(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { %cst = arith.constant dense<1.0> : tensor<16x3x3x3xf32> %0 = "lq.Bconv2d"(%arg0, %cst, %arg1, %arg2, %arg3) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> return %0 : tensor<256x30x30x16xf32> diff --git a/larq_compute_engine/mlir/tests/const-fold.mlir b/larq_compute_engine/mlir/tests/const-fold.mlir index 034f51360..db8ded4e7 100644 --- a/larq_compute_engine/mlir/tests/const-fold.mlir +++ b/larq_compute_engine/mlir/tests/const-fold.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -canonicalize | FileCheck %s // CHECK-LABEL: @quantize -func @quantize() -> (tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>) { +func.func @quantize() -> (tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>) { %pos = arith.constant dense< 0.5> : tensor<1x1x2x32xf32> %neg = arith.constant dense<-0.5> : tensor<1x1x2x32xf32> %0 = "lq.Quantize"(%pos) {} : (tensor<1x1x2x32xf32>) -> tensor<1x1x2x1xi32> @@ -14,7 +14,7 @@ func @quantize() -> (tensor<1x1x2x1xi32>, tensor<1x1x2x1xi32>) { } // CHECK-LABEL: @dequantize -func @dequantize() -> (tensor<1x1x2x32xf32>, tensor<1x1x2x32xf32>) { +func.func @dequantize() -> (tensor<1x1x2x32xf32>, tensor<1x1x2x32xf32>) { %pos = arith.constant dense<0> : tensor<1x1x2x1xi32> %neg = arith.constant dense<-1> : tensor<1x1x2x1xi32> %0 = "lq.Dequantize"(%pos) {} : (tensor<1x1x2x1xi32>) -> tensor<1x1x2x32xf32> diff --git a/larq_compute_engine/mlir/tests/fuse_padding.mlir b/larq_compute_engine/mlir/tests/fuse_padding.mlir index 10e9544d2..6e8c63a80 100644 --- a/larq_compute_engine/mlir/tests/fuse_padding.mlir +++ b/larq_compute_engine/mlir/tests/fuse_padding.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -tfl-fuse-padding -verify-diagnostics | FileCheck %s // CHECK-LABEL: @fuse_pad_into_conv_valid -func @fuse_pad_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { +func.func @fuse_pad_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> %cst1 = arith.constant dense<1.0> : tensor<16x3x3x8xf32> %cst2 = arith.constant dense<1.0> : tensor<16xf32> @@ -14,7 +14,7 @@ func @fuse_pad_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x1 } // CHECK-LABEL: @fuse_padv2_into_conv_valid -func @fuse_padv2_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { +func.func @fuse_padv2_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> %cst1 = arith.constant dense<0.0> : tensor %cst2 = arith.constant dense<1.0> : tensor<16x3x3x8xf32> @@ -28,7 +28,7 @@ func @fuse_padv2_into_conv_valid(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64 } // CHECK-LABEL: @fuse_pad_into_dwconv_valid -func @fuse_pad_into_dwconv_valid(%arg0: tensor<1x64x64x16xf32>) -> tensor<1x64x64x16xf32> { +func.func @fuse_pad_into_dwconv_valid(%arg0: tensor<1x64x64x16xf32>) -> tensor<1x64x64x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> %cst1 = arith.constant dense<1.0> : tensor<1x3x3x16xf32> %cst2 = arith.constant dense<1.0> : tensor<16xf32> @@ -41,7 +41,7 @@ func @fuse_pad_into_dwconv_valid(%arg0: tensor<1x64x64x16xf32>) -> tensor<1x64x6 } // CHECK-LABEL: @do_not_fuse_padv2_into_conv_wrong_pad_value -func @do_not_fuse_padv2_into_conv_wrong_pad_value(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { +func.func @do_not_fuse_padv2_into_conv_wrong_pad_value(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x64x64x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> %cst1 = arith.constant dense<1.0> : tensor %cst2 = arith.constant dense<1.0> : tensor<16x3x3x8xf32> @@ -54,7 +54,7 @@ func @do_not_fuse_padv2_into_conv_wrong_pad_value(%arg0: tensor<1x64x64x8xf32>) } // CHECK-LABEL: @do_not_fuse_pad_into_conv_same -func @do_not_fuse_pad_into_conv_same(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x66x66x16xf32> { +func.func @do_not_fuse_pad_into_conv_same(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x66x66x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> %cst1 = arith.constant dense<1.0> : tensor %cst2 = arith.constant dense<1.0> : tensor<16x3x3x8xf32> @@ -67,7 +67,7 @@ func @do_not_fuse_pad_into_conv_same(%arg0: tensor<1x64x64x8xf32>) -> tensor<1x6 } // CHECK-LABEL: @do_not_fuse_pad_into_dwconv_channelpad -func @do_not_fuse_pad_into_dwconv_channelpad(%arg0: tensor<1x64x64x12xf32>) -> tensor<1x64x64x16xf32> { +func.func @do_not_fuse_pad_into_dwconv_channelpad(%arg0: tensor<1x64x64x12xf32>) -> tensor<1x64x64x16xf32> { %cst0 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [1, 3]]> : tensor<4x2xi32> %cst1 = arith.constant dense<1.0> : tensor<1x3x3x16xf32> %cst2 = arith.constant dense<1.0> : tensor<16xf32> diff --git a/larq_compute_engine/mlir/tests/legalize-lce.mlir b/larq_compute_engine/mlir/tests/legalize-lce.mlir index 3230cbe88..fc24cc087 100644 --- a/larq_compute_engine/mlir/tests/legalize-lce.mlir +++ b/larq_compute_engine/mlir/tests/legalize-lce.mlir @@ -2,7 +2,7 @@ // RUN: lce-tf-opt %s -tfl-legalize-lce -lce-translate-tfl -verify-diagnostics | FileCheck %s --check-prefix=TRANSLATE // CHECK-LABEL: @legalize_bconv2d -func @legalize_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: none) -> tensor<256x30x30x16xf32> { +func.func @legalize_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: none) -> tensor<256x30x30x16xf32> { %0 = "lq.Bconv2d"(%arg0, %arg1, %arg2, %arg3, %arg4) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> return %0 : tensor<256x30x30x16xf32> @@ -14,7 +14,7 @@ func @legalize_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf3 } // CHECK-LABEL: @legalize_bmax_pool2d -func @legalize_bmax_pool2d(%arg0: tensor<256x32x32x3xi32>) -> tensor<256x16x16x3xi32> { +func.func @legalize_bmax_pool2d(%arg0: tensor<256x32x32x3xi32>) -> tensor<256x16x16x3xi32> { %0 = "lq.BMaxPool2d"(%arg0) {filter_height = 2 : i32, filter_width = 2 : i32, padding = "SAME", stride_height = 2 : i32, stride_width = 2 : i32} : (tensor<256x32x32x3xi32>) -> tensor<256x16x16x3xi32> return %0 : tensor<256x16x16x3xi32> @@ -26,7 +26,7 @@ func @legalize_bmax_pool2d(%arg0: tensor<256x32x32x3xi32>) -> tensor<256x16x16x3 } // CHECK-LABEL: @legalize_quantize -func @legalize_quantize(%arg0: tensor<256x32x32x64xf32>) -> tensor<256x32x32x2xi32> { +func.func @legalize_quantize(%arg0: tensor<256x32x32x64xf32>) -> tensor<256x32x32x2xi32> { %0 = "lq.Quantize"(%arg0) {} : (tensor<256x32x32x64xf32>) -> tensor<256x32x32x2xi32> return %0 : tensor<256x32x32x2xi32> @@ -38,7 +38,7 @@ func @legalize_quantize(%arg0: tensor<256x32x32x64xf32>) -> tensor<256x32x32x2xi } // CHECK-LABEL: @legalize_dequantize -func @legalize_dequantize(%arg0: tensor<256x32x32x2xi32>) -> tensor<256x32x32x64xf32> { +func.func @legalize_dequantize(%arg0: tensor<256x32x32x2xi32>) -> tensor<256x32x32x64xf32> { %0 = "lq.Dequantize"(%arg0) {} : (tensor<256x32x32x2xi32>) -> tensor<256x32x32x64xf32> return %0 : tensor<256x32x32x64xf32> diff --git a/larq_compute_engine/mlir/tests/op-removal.mlir b/larq_compute_engine/mlir/tests/op-removal.mlir index 97390b75d..45e7d6248 100644 --- a/larq_compute_engine/mlir/tests/op-removal.mlir +++ b/larq_compute_engine/mlir/tests/op-removal.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -lce-op-removal-tf -verify-diagnostics | FileCheck %s // CHECK-LABEL: @snapshot -func @snapshot(%arg0: tensor<3xi32>) -> tensor<3xi32> { +func.func @snapshot(%arg0: tensor<3xi32>) -> tensor<3xi32> { %0 = "tf.Snapshot"(%arg0) : (tensor<3xi32>) -> tensor<3xi32> return %0 : tensor<3xi32> // Should be converted to Identity and then from Identity to value @@ -9,7 +9,7 @@ func @snapshot(%arg0: tensor<3xi32>) -> tensor<3xi32> { } // CHECK-LABEL: @stop_gradient -func @stop_gradient(%arg0: tensor<3xi32>) -> tensor<3xi32> { +func.func @stop_gradient(%arg0: tensor<3xi32>) -> tensor<3xi32> { %0 = "tf.StopGradient"(%arg0) : (tensor<3xi32>) -> tensor<3xi32> return %0 : tensor<3xi32> // Should be converted to Identity and then from Identity to value @@ -17,7 +17,7 @@ func @stop_gradient(%arg0: tensor<3xi32>) -> tensor<3xi32> { } // CHECK-LABEL: @check_numerics -func @check_numerics(%arg0: tensor<3xf32>) -> tensor<3xf32> { +func.func @check_numerics(%arg0: tensor<3xf32>) -> tensor<3xf32> { %0 = "tf.CheckNumerics"(%arg0) {message = ""}: (tensor<3xf32>) -> tensor<3xf32> return %0 : tensor<3xf32> // Should be converted to Identity and then from Identity to value @@ -25,7 +25,7 @@ func @check_numerics(%arg0: tensor<3xf32>) -> tensor<3xf32> { } // CHECK-LABEL: @placeholder_with_default -func @placeholder_with_default(%arg0: tensor<3xf32>) -> tensor<3xf32> { +func.func @placeholder_with_default(%arg0: tensor<3xf32>) -> tensor<3xf32> { %0 = "tf.PlaceholderWithDefault"(%arg0): (tensor<3xf32>) -> tensor<3xf32> return %0 : tensor<3xf32> // Should be converted to Identity and then from Identity to value @@ -33,7 +33,7 @@ func @placeholder_with_default(%arg0: tensor<3xf32>) -> tensor<3xf32> { } // CHECK-LABEL: @identity -func @identity(%arg0: tensor<10xi32>, %arg1: tensor<20xi32>, %arg2: tensor<30xi32>) -> (tensor<10xi32>, tensor<20xi32>, tensor<30xi32>) { +func.func @identity(%arg0: tensor<10xi32>, %arg1: tensor<20xi32>, %arg2: tensor<30xi32>) -> (tensor<10xi32>, tensor<20xi32>, tensor<30xi32>) { %0 = "tf.Identity"(%arg0) : (tensor<10xi32>) -> tensor<10xi32> %1:2 = "tf.IdentityN"(%arg1,%arg2) : (tensor<20xi32>, tensor<30xi32>) -> (tensor<20xi32>, tensor<30xi32>) return %0, %1#0, %1#1: tensor<10xi32>, tensor<20xi32>, tensor<30xi32> diff --git a/larq_compute_engine/mlir/tests/optimize.mlir b/larq_compute_engine/mlir/tests/optimize.mlir index a5b0c3e8c..c1f0efda8 100644 --- a/larq_compute_engine/mlir/tests/optimize.mlir +++ b/larq_compute_engine/mlir/tests/optimize.mlir @@ -2,7 +2,7 @@ // RUN: lce-tf-opt %s -tfl-optimize-lce=target=xcore -verify-diagnostics | FileCheck %s --check-prefixes CHECK,CHECK-XCORE // CHECK-LABEL: @optimize_quantize_greater_equal_zero -func @optimize_quantize_greater_equal_zero(%arg0: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { +func.func @optimize_quantize_greater_equal_zero(%arg0: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { %cst = arith.constant dense<0.0> : tensor %0 = "tfl.greater_equal"(%arg0, %cst) : (tensor<48x48x64xf32>, tensor) -> tensor<48x48x64xi1> %1 = "lq.Quantize"(%0) : (tensor<48x48x64xi1>) -> tensor<48x48x2xi32> @@ -13,7 +13,7 @@ func @optimize_quantize_greater_equal_zero(%arg0: tensor<48x48x64xf32>) -> tenso } // CHECK-LABEL: @optimize_quantize_greater_equal_non_zero -func @optimize_quantize_greater_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg1: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { +func.func @optimize_quantize_greater_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg1: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { %0 = "tfl.greater_equal"(%arg0, %arg1) : (tensor<48x48x64xf32>, tensor<48x48x64xf32>) -> tensor<48x48x64xi1> %1 = "lq.Quantize"(%0) : (tensor<48x48x64xi1>) -> tensor<48x48x2xi32> return %1 : tensor<48x48x2xi32> @@ -24,7 +24,7 @@ func @optimize_quantize_greater_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg } // CHECK-LABEL: @optimize_quantize_less_equal_zero -func @optimize_quantize_less_equal_zero(%arg0: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { +func.func @optimize_quantize_less_equal_zero(%arg0: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { %cst = arith.constant dense<0.0> : tensor<64xf32> %0 = "tfl.less_equal"(%cst, %arg0) : (tensor<64xf32>, tensor<48x48x64xf32>) -> tensor<48x48x64xi1> %1 = "lq.Quantize"(%0) : (tensor<48x48x64xi1>) -> tensor<48x48x2xi32> @@ -35,7 +35,7 @@ func @optimize_quantize_less_equal_zero(%arg0: tensor<48x48x64xf32>) -> tensor<4 } // CHECK-LABEL: @optimize_quantize_less_equal_non_zero -func @optimize_quantize_less_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg1: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { +func.func @optimize_quantize_less_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg1: tensor<48x48x64xf32>) -> tensor<48x48x2xi32> { %0 = "tfl.less_equal"(%arg0, %arg1) : (tensor<48x48x64xf32>, tensor<48x48x64xf32>) -> tensor<48x48x64xi1> %1 = "lq.Quantize"(%0) : (tensor<48x48x64xi1>) -> tensor<48x48x2xi32> return %1 : tensor<48x48x2xi32> @@ -46,7 +46,7 @@ func @optimize_quantize_less_equal_non_zero(%arg0: tensor<48x48x64xf32>, %arg1: } // CHECK-LABEL: @fuse_add_into_bconv2d -func @fuse_add_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_add_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { %cst = arith.constant dense<1.5> : tensor<16xf32> %post_activation_bias = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %arg2, %post_activation_bias, %arg3) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -59,7 +59,7 @@ func @fuse_add_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3 } // CHECK-LABEL: @fuse_sub_into_bconv2d -func @fuse_sub_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_sub_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: tensor<16xf32>, %arg3: none) -> tensor<256x30x30x16xf32> { %cst = arith.constant dense<0.5> : tensor<16xf32> %post_activation_bias = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %arg2, %post_activation_bias, %arg3) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -72,7 +72,7 @@ func @fuse_sub_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3 } // CHECK-LABEL: @fuse_div_into_bconv2d -func @fuse_div_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_div_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %cst = arith.constant dense<0.5> : tensor<16xf32> %post_activation_bias = arith.constant dense<1.5> : tensor<16xf32> %post_activation_multiplier = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32> @@ -87,7 +87,7 @@ func @fuse_div_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3 } // CHECK-LABEL: @fuse_mul_into_bconv2d -func @fuse_mul_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_mul_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %cst = arith.constant dense<2.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<1.5> : tensor<16xf32> %post_activation_multiplier = arith.constant dense<[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : tensor<16xf32> @@ -102,7 +102,7 @@ func @fuse_mul_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3 } // CHECK-LABEL: @fuse_relu_into_bconv2d -func @fuse_relu_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_relu_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -114,7 +114,7 @@ func @fuse_relu_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x } // CHECK-LABEL: @fuse_relu6_into_bconv2d -func @fuse_relu6_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_relu6_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -126,7 +126,7 @@ func @fuse_relu6_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3 } // CHECK-LABEL: @fuse_relu1_into_bconv2d -func @fuse_relu1_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @fuse_relu1_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -138,7 +138,7 @@ func @fuse_relu1_into_bconv2d(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3 } // CHECK-LABEL: @fuse_relu_into_bconv2d_padding_same_one -func @fuse_relu_into_bconv2d_padding_same_one(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x32x32x16xf32> { +func.func @fuse_relu_into_bconv2d_padding_same_one(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x32x32x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x32x32x16xf32> @@ -150,7 +150,7 @@ func @fuse_relu_into_bconv2d_padding_same_one(%arg0: tensor<256x32x32x1xi32>, %a } // CHECK-LABEL: @do_not_fuse_relu_into_bconv2d_padding_same_zero -func @do_not_fuse_relu_into_bconv2d_padding_same_zero(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x32x32x16xf32> { +func.func @do_not_fuse_relu_into_bconv2d_padding_same_zero(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x32x32x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x32x32x16xf32> @@ -163,7 +163,7 @@ func @do_not_fuse_relu_into_bconv2d_padding_same_zero(%arg0: tensor<256x32x32x1x } // CHECK-LABEL: @do_not_fuse_relu_into_bconv2d_no_post_activation_bias -func @do_not_fuse_relu_into_bconv2d_no_post_activation_bias(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @do_not_fuse_relu_into_bconv2d_no_post_activation_bias(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %post_activation_multiplier = arith.constant dense<1.0> : tensor<16xf32> %post_activation_bias = arith.constant dense<5.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -176,7 +176,7 @@ func @do_not_fuse_relu_into_bconv2d_no_post_activation_bias(%arg0: tensor<256x32 } // CHECK-LABEL: @do_not_fuse_relu_into_bconv2d_no_post_activation_multiplier -func @do_not_fuse_relu_into_bconv2d_no_post_activation_multiplier(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { +func.func @do_not_fuse_relu_into_bconv2d_no_post_activation_multiplier(%arg0: tensor<256x32x32x1xi32>, %arg1: tensor<16x3x3x3xf32>, %arg2: none) -> tensor<256x30x30x16xf32> { %post_activation_multiplier = arith.constant dense<0.8> : tensor<16xf32> %post_activation_bias = arith.constant dense<0.0> : tensor<16xf32> %0 = "lq.Bconv2d"(%arg0, %arg1, %post_activation_multiplier, %post_activation_bias, %arg2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x30x30x16xf32> @@ -189,7 +189,7 @@ func @do_not_fuse_relu_into_bconv2d_no_post_activation_multiplier(%arg0: tensor< } // CHECK-LABEL: @target_specific_reorder_maxpool_2d_quantize -func @target_specific_reorder_maxpool_2d_quantize(%arg0: tensor<256x32x32x65xf32>) -> tensor<256x16x8x3xi32> { +func.func @target_specific_reorder_maxpool_2d_quantize(%arg0: tensor<256x32x32x65xf32>) -> tensor<256x16x8x3xi32> { %0 = "tfl.max_pool_2d"(%arg0) {filter_height = 3 : i32, filter_width = 2 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 2 : i32, stride_w = 4 : i32} : (tensor<256x32x32x65xf32>) -> tensor<256x16x8x65xf32> %1 = "lq.Quantize"(%0) : (tensor<256x16x8x65xf32>) -> tensor<256x16x8x3xi32> return %1 : tensor<256x16x8x3xi32> @@ -204,7 +204,7 @@ func @target_specific_reorder_maxpool_2d_quantize(%arg0: tensor<256x32x32x65xf32 } // CHECK-LABEL: @do_not_reorder_maxpool_2d_quantize_multiple_uses -func @do_not_reorder_maxpool_2d_quantize_multiple_uses(%arg0: tensor<256x32x32x65xf32>) -> (tensor<256x16x8x65xf32>, tensor<256x16x8x3xi32>) { +func.func @do_not_reorder_maxpool_2d_quantize_multiple_uses(%arg0: tensor<256x32x32x65xf32>) -> (tensor<256x16x8x65xf32>, tensor<256x16x8x3xi32>) { %0 = "tfl.max_pool_2d"(%arg0) {filter_height = 3 : i32, filter_width = 2 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 2 : i32, stride_w = 4 : i32} : (tensor<256x32x32x65xf32>) -> tensor<256x16x8x65xf32> %1 = "lq.Quantize"(%0) : (tensor<256x16x8x65xf32>) -> tensor<256x16x8x3xi32> return %0, %1 : tensor<256x16x8x65xf32>, tensor<256x16x8x3xi32> @@ -215,11 +215,11 @@ func @do_not_reorder_maxpool_2d_quantize_multiple_uses(%arg0: tensor<256x32x32x6 } // CHECK-LABEL: @bitpack_activation_thresholds_with_negative_post_multipliers -func @bitpack_activation_thresholds_with_negative_post_multipliers(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x1xi32> { +func.func @bitpack_activation_thresholds_with_negative_post_multipliers(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x1xi32> { %filter = arith.constant dense<1.0> : tensor<8x2x2x1xf32> %post_activation_multiplier = arith.constant dense<[-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0]> : tensor<8xf32> %post_activation_bias = arith.constant dense<[-10.0, 8.0, 0.4, 1.0, -0.01, 0.5, -1.0, 2.71]> : tensor<8xf32> - %cst = constant unit + %cst = "tfl.no_value"() {value = unit} : () -> none %0 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %cst) {channels_in = 1 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<8x2x2x1xf32>, tensor<8xf32>, tensor<8xf32>, none) -> tensor<256x32x32x8xf32> %1 = "lq.Quantize"(%0) : (tensor<256x32x32x8xf32>) -> tensor<256x32x32x1xi32> return %1 : tensor<256x32x32x1xi32> @@ -231,69 +231,69 @@ func @bitpack_activation_thresholds_with_negative_post_multipliers(%arg0: tensor // CHECK: %cst = arith.constant {{dense<\[\[\[\[-1.000000e\+00\], \[-1.000000e\+00\]\], \[\[-1.000000e\+00\], \[-1.000000e\+00\]\]\], \[\[\[-1.000000e\+00\], \[-1.000000e\+00\]\], \[\[-1.000000e\+00\], \[-1.000000e\+00\]\]\], \[\[\[-1.000000e\+00\], \[-1.000000e\+00\]\], \[\[-1.000000e\+00\], \[-1.000000e\+00\]\]\], \[\[\[-1.000000e\+00\], \[-1.000000e\+00\]\], \[\[-1.000000e\+00\], \[-1.000000e\+00\]\]\], \[\[\[1.000000e\+00\], \[1.000000e\+00\]\], \[\[1.000000e\+00\], \[1.000000e\+00\]\]\], \[\[\[1.000000e\+00\], \[1.000000e\+00\]\], \[\[1.000000e\+00\], \[1.000000e\+00\]\]\], \[\[\[1.000000e\+00\], \[1.000000e\+00\]\], \[\[1.000000e\+00\], \[1.000000e\+00\]\]\], \[\[\[1.000000e\+00\], \[1.000000e\+00\]\], \[\[1.000000e\+00\], \[1.000000e\+00\]\]\]\]>}} : tensor<8x2x2x1xf32> // The `none` value that will replace the post-multiplier and post-bias. - // CHECK-NEXT: %cst_0 = constant unit + // CHECK-NEXT: %0 = "tfl.no_value"() {value} : () -> none // Verify correct thresholds. These have been manually computed. - // CHECK-NEXT: %cst_1 = arith.constant dense<[0, 3, 2, 2, -2147483648, 2, 1, 2]> : tensor<8xi32> + // CHECK-NEXT: %cst_0 = arith.constant dense<[0, 3, 2, 2, -2147483648, 2, 1, 2]> : tensor<8xi32> - // CHECK-NEXT: %0 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_0, %cst_1) {channels_in = 1 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<8x2x2x1xf32>, none, none, tensor<8xi32>) -> tensor<256x32x32x1xi32> - // CHECK-NEXT: return %0 + // CHECK-NEXT: %1 = "lq.Bconv2d"(%arg0, %cst, %0, %0, %cst_0) {channels_in = 1 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<8x2x2x1xf32>, none, none, tensor<8xi32>) -> tensor<256x32x32x1xi32> + // CHECK-NEXT: return %1 } // CHECK-LABEL: @bitpack_activations_valid_padding -func @bitpack_activations_valid_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x30x30x3xi32> { +func.func @bitpack_activations_valid_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x30x30x3xi32> { %filter = arith.constant dense<1.0> : tensor<65x3x3x3xf32> %post_activation_multiplier = arith.constant dense<0.5> : tensor<65xf32> %post_activation_bias = arith.constant dense<-1.0> : tensor<65xf32> - %cst = constant unit - %0 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %cst) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> - %1 = "lq.Quantize"(%0) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> - return %1 : tensor<256x30x30x3xi32> + %0 = "tfl.no_value"() {value = unit} : () -> none + %1 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> + %2 = "lq.Quantize"(%1) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> + return %2 : tensor<256x30x30x3xi32> - // CHECK: %0 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_0, %cst_1) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, none, none, tensor<65xi32>) -> tensor<256x30x30x3xi32> - // CHECK-NEXT: return %0 + // CHECK: %1 = "lq.Bconv2d"(%arg0, %cst, %0, %0, %cst_0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, none, none, tensor<65xi32>) -> tensor<256x30x30x3xi32> + // CHECK-NEXT: return %1 } // CHECK-LABEL: @bitpack_activations_same_one_padding -func @bitpack_activations_same_one_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x3xi32> { +func.func @bitpack_activations_same_one_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x3xi32> { %filter = arith.constant dense<1.0> : tensor<65x3x3x3xf32> %post_activation_multiplier = arith.constant dense<0.5> : tensor<65xf32> %post_activation_bias = arith.constant dense<-1.0> : tensor<65xf32> - %cst = constant unit - %0 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %cst) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> - %1 = "lq.Quantize"(%0) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> - return %1 : tensor<256x32x32x3xi32> + %0 = "tfl.no_value"() {value = unit} : () -> none + %1 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> + %2 = "lq.Quantize"(%1) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> + return %2 : tensor<256x32x32x3xi32> - // CHECK: %0 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_0, %cst_1) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, none, none, tensor<65xi32>) -> tensor<256x32x32x3xi32> - // CHECK-NEXT: return %0 + // CHECK: %1 = "lq.Bconv2d"(%arg0, %cst, %0, %0, %cst_0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, none, none, tensor<65xi32>) -> tensor<256x32x32x3xi32> + // CHECK-NEXT: return %1 } // CHECK-LABEL: @do_not_bitpack_activations_same_zero_padding -func @do_not_bitpack_activations_same_zero_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x3xi32> { +func.func @do_not_bitpack_activations_same_zero_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x3xi32> { %filter = arith.constant dense<1.0> : tensor<65x3x3x3xf32> %post_activation_multiplier = arith.constant dense<0.5> : tensor<65xf32> %post_activation_bias = arith.constant dense<-1.0> : tensor<65xf32> - %cst = constant unit - %0 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %cst) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> - %1 = "lq.Quantize"(%0) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> - return %1 : tensor<256x32x32x3xi32> - - // CHECK: %0 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_1, %cst_2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> - // CHECK-NEXT: %1 = "lq.Quantize"(%0) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> - // CHECK-NEXT: return %1 + %0 = "tfl.no_value"() {value = unit} : () -> none + %1 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> + %2 = "lq.Quantize"(%1) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> + return %2 : tensor<256x32x32x3xi32> + + // CHECK: %1 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_1, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x32x32x65xf32> + // CHECK-NEXT: %2 = "lq.Quantize"(%1) : (tensor<256x32x32x65xf32>) -> tensor<256x32x32x3xi32> + // CHECK-NEXT: return %2 } // CHECK-LABEL: @do_not_bitpack_activations_multiple_uses -func @do_not_bitpack_activations_multiple_uses(%arg0: tensor<256x32x32x1xi32>) -> (tensor<256x30x30x65xf32>, tensor<256x30x30x3xi32>) { +func.func @do_not_bitpack_activations_multiple_uses(%arg0: tensor<256x32x32x1xi32>) -> (tensor<256x30x30x65xf32>, tensor<256x30x30x3xi32>) { %filter = arith.constant dense<1.0> : tensor<65x3x3x3xf32> %post_activation_multiplier = arith.constant dense<0.5> : tensor<65xf32> %post_activation_bias = arith.constant dense<-1.0> : tensor<65xf32> - %cst = constant unit - %0 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %cst) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> - %1 = "lq.Quantize"(%0) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> - return %0, %1: tensor<256x30x30x65xf32>, tensor<256x30x30x3xi32> - - // CHECK: %0 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_1, %cst_2) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> - // CHECK-NEXT: %1 = "lq.Quantize"(%0) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> - // CHECK-NEXT: return %0, %1 + %0 = "tfl.no_value"() {value = unit} : () -> none + %1 = "lq.Bconv2d"(%arg0, %filter, %post_activation_multiplier, %post_activation_bias, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> + %2 = "lq.Quantize"(%1) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> + return %1, %2: tensor<256x30x30x65xf32>, tensor<256x30x30x3xi32> + + // CHECK: %1 = "lq.Bconv2d"(%arg0, %cst, %cst_0, %cst_1, %0) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<256x32x32x1xi32>, tensor<65x3x3x3xf32>, tensor<65xf32>, tensor<65xf32>, none) -> tensor<256x30x30x65xf32> + // CHECK-NEXT: %2 = "lq.Quantize"(%1) : (tensor<256x30x30x65xf32>) -> tensor<256x30x30x3xi32> + // CHECK-NEXT: return %1, %2 } diff --git a/larq_compute_engine/mlir/tests/prepare-tf.mlir b/larq_compute_engine/mlir/tests/prepare-tf.mlir index 69490cef6..22fa7194b 100644 --- a/larq_compute_engine/mlir/tests/prepare-tf.mlir +++ b/larq_compute_engine/mlir/tests/prepare-tf.mlir @@ -2,7 +2,7 @@ // RUN: lce-tf-opt %s -tfl-prepare-lce=target=xcore -verify-diagnostics | FileCheck %s --check-prefixes CHECK,CHECK-XCORE // CHECK-LABEL: @fuse_bsign_tf_where -func @fuse_bsign_tf_where(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { +func.func @fuse_bsign_tf_where(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { %cst_l = arith.constant dense<1.0> : tensor<8x16xf32> %cst_r = arith.constant dense<-1.0> : tensor<8x16xf32> %0 = "tf.SelectV2"(%arg0, %cst_l, %cst_r) : (tensor<8x16xi1>, tensor<8x16xf32>, tensor<8x16xf32>) -> tensor<8x16xf32> @@ -14,7 +14,7 @@ func @fuse_bsign_tf_where(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { } // CHECK-LABEL: @fuse_bsign_tf_where_inverted -func @fuse_bsign_tf_where_inverted(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { +func.func @fuse_bsign_tf_where_inverted(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { %cst_l = arith.constant dense<-1.0> : tensor<8x16xf32> %cst_r = arith.constant dense<1.0> : tensor<8x16xf32> %0 = "tf.SelectV2"(%arg0, %cst_l, %cst_r) : (tensor<8x16xi1>, tensor<8x16xf32>, tensor<8x16xf32>) -> tensor<8x16xf32> @@ -27,7 +27,7 @@ func @fuse_bsign_tf_where_inverted(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { } // CHECK-LABEL: @fuse_bsign_tf_where_broadcast_cond -func @fuse_bsign_tf_where_broadcast_cond(%arg0: tensor<8x1xi1>) -> tensor<8x16xf32> { +func.func @fuse_bsign_tf_where_broadcast_cond(%arg0: tensor<8x1xi1>) -> tensor<8x16xf32> { %cst_l = arith.constant dense<1.0> : tensor<8x16xf32> %cst_r = arith.constant dense<-1.0> : tensor<8x16xf32> %0 = "tf.SelectV2"(%arg0, %cst_l, %cst_r) : (tensor<8x1xi1>, tensor<8x16xf32>, tensor<8x16xf32>) -> tensor<8x16xf32> @@ -41,7 +41,7 @@ func @fuse_bsign_tf_where_broadcast_cond(%arg0: tensor<8x1xi1>) -> tensor<8x16xf } // CHECK-LABEL: @fuse_bsign_tf_where_broadcast_lhs_rhs -func @fuse_bsign_tf_where_broadcast_lhs_rhs(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { +func.func @fuse_bsign_tf_where_broadcast_lhs_rhs(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { %cst_l = arith.constant dense<1.0> : tensor %cst_r = arith.constant dense<-1.0> : tensor<8x1xf32> %0 = "tf.SelectV2"(%arg0, %cst_l, %cst_r) : (tensor<8x16xi1>, tensor, tensor<8x1xf32>) -> tensor<8x16xf32> @@ -53,7 +53,7 @@ func @fuse_bsign_tf_where_broadcast_lhs_rhs(%arg0: tensor<8x16xi1>) -> tensor<8x } // CHECK-LABEL: @fuse_bsign_tf_where_select_v1_op -func @fuse_bsign_tf_where_select_v1_op(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { +func.func @fuse_bsign_tf_where_select_v1_op(%arg0: tensor<8x16xi1>) -> tensor<8x16xf32> { %cst_l = arith.constant dense<1.0> : tensor<8x16xf32> %cst_r = arith.constant dense<-1.0> : tensor<8x16xf32> %0 = "tf.Select"(%arg0, %cst_l, %cst_r) : (tensor<8x16xi1>, tensor<8x16xf32>, tensor<8x16xf32>) -> tensor<8x16xf32> @@ -65,7 +65,7 @@ func @fuse_bsign_tf_where_select_v1_op(%arg0: tensor<8x16xi1>) -> tensor<8x16xf3 } // CHECK-LABEL: @fuse_bsign_legacy_tf_sign -func @fuse_bsign_legacy_tf_sign(%arg0: tensor<8x16xf32>) -> tensor<8x16xf32> { +func.func @fuse_bsign_legacy_tf_sign(%arg0: tensor<8x16xf32>) -> tensor<8x16xf32> { %0 = "tf.Sign"(%arg0) : (tensor<8x16xf32>) -> tensor<8x16xf32> %cst = arith.constant dense<0.1> : tensor %2 = "tf.AddV2"(%0, %cst) : (tensor<8x16xf32>, tensor) -> tensor<8x16xf32> @@ -78,7 +78,7 @@ func @fuse_bsign_legacy_tf_sign(%arg0: tensor<8x16xf32>) -> tensor<8x16xf32> { } // CHECK-LABEL: @fuse_bconv2d_valid_padding -func @fuse_bconv2d_valid_padding(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x110x2xf32> { +func.func @fuse_bconv2d_valid_padding(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x110x2xf32> { %cst = "tf.Const"() { value = dense<[[[[1.0, -1.0], [1.0, 1.0]], [[-1.0, 1.0], [-1.0, 1.0]], [[-1.0, -1.0], [-1.0, 1.0]]]]> : tensor<1x3x2x2xf32> } : () -> tensor<1x3x2x2xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> %1 = "tf.Conv2D"(%0, %cst) {padding = "VALID", strides = [1, 1, 1, 1]} : (tensor<1x112x112x2xf32>, tensor<1x3x2x2xf32>) -> tensor<1x112x110x2xf32> @@ -87,14 +87,14 @@ func @fuse_bconv2d_valid_padding(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112 // CHECK: %cst = arith.constant // CHECK: %[[post_activation_multiplier:.*]] = arith.constant dense<1.000000e+00> : tensor<2xf32> // CHECK: %[[post_activation_bias:.*]] = arith.constant dense<0.000000e+00> : tensor<2xf32> - // CHECK: %[[output_threshold:.*]] = constant unit + // CHECK: %[[output_threshold:.*]] = "tfl.no_value"() {value} : () -> none // CHECK: %[[transpose:.*]] = "tf.Transpose" // CHECK-NEXT: %[[conv:.*]] = "lq.Bconv2d"(%arg0, %[[transpose]], %[[post_activation_multiplier]], %[[post_activation_bias]], %[[output_threshold:.*]]) {channels_in = 2 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<1x112x112x1xi32>, tensor<2x1x3x2xf32>, tensor<2xf32>, tensor<2xf32>, none) -> tensor<1x112x110x2xf32> // CHECK-NEXT: return %[[conv]] } // CHECK-LABEL: @target_specific_fuse_bconv2d_same_zero_padding -func @target_specific_fuse_bconv2d_same_zero_padding(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { +func.func @target_specific_fuse_bconv2d_same_zero_padding(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { %cst = "tf.Const"() { value = dense<[[[[1.0, -1.0], [1.0, 1.0]], [[-1.0, 1.0], [-1.0, 1.0]]]]> : tensor<1x2x2x2xf32> } : () -> tensor<1x2x2x2xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> %1 = "tf.Conv2D"(%0, %cst) {padding = "SAME", strides = [1, 1, 1, 1]} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x112x112x2xf32> @@ -103,7 +103,7 @@ func @target_specific_fuse_bconv2d_same_zero_padding(%arg0: tensor<1x112x112x1xi // CHECK-ARM: %cst = arith.constant // CHECK-ARM: %[[post_activation_multiplier:.*]] = arith.constant dense<1.000000e+00> : tensor<2xf32> // CHECK-ARM: %[[post_activation_bias:.*]] = arith.constant dense<0.000000e+00> : tensor<2xf32> - // CHECK-ARM: %[[output_threshold:.*]] = constant unit + // CHECK-ARM: %[[output_threshold:.*]] = "tfl.no_value"() {value} : () -> none // CHECK-ARM: %[[transpose:.*]] = "tf.Transpose" // CHECK-ARM-NEXT: %[[conv:.*]] = "lq.Bconv2d"(%arg0, %[[transpose]], %[[post_activation_multiplier]], %[[post_activation_bias]], %[[output_threshold:.*]]) {channels_in = 2 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<1x112x112x1xi32>, tensor<2x1x2x2xf32>, tensor<2xf32>, tensor<2xf32>, none) -> tensor<1x112x112x2xf32> // CHECK-ARM-NEXT: return %[[conv]] @@ -113,7 +113,7 @@ func @target_specific_fuse_bconv2d_same_zero_padding(%arg0: tensor<1x112x112x1xi } // CHECK-LABEL: @fuse_bconv2d_grouped_convolution -func @fuse_bconv2d_grouped_convolution(%arg0: tensor<1x112x112x4xi32>) -> tensor<1x110x110x16xf32> { +func.func @fuse_bconv2d_grouped_convolution(%arg0: tensor<1x112x112x4xi32>) -> tensor<1x110x110x16xf32> { // A 3x3 filter with 128 input channels (64 per-group) and 16 output channels (8 per-group). %cst = "tf.Const"() { value = dense<1.0> : tensor<3x3x64x16xf32>} : () -> tensor<3x3x64x16xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x4xi32>) -> tensor<1x112x112x128xf32> @@ -123,14 +123,14 @@ func @fuse_bconv2d_grouped_convolution(%arg0: tensor<1x112x112x4xi32>) -> tensor // CHECK: %cst = arith.constant // CHECK: %[[post_activation_multiplier:.*]] = arith.constant dense<1.000000e+00> : tensor<16xf32> // CHECK: %[[post_activation_bias:.*]] = arith.constant dense<0.000000e+00> : tensor<16xf32> - // CHECK: %[[output_threshold:.*]] = constant unit + // CHECK: %[[output_threshold:.*]] = "tfl.no_value"() {value} : () -> none // CHECK: %[[transpose:.*]] = "tf.Transpose" // CHECK-NEXT: %[[conv:.*]] = "lq.Bconv2d"(%arg0, %[[transpose]], %[[post_activation_multiplier]], %[[post_activation_bias]], %[[output_threshold:.*]]) {channels_in = 128 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<1x112x112x4xi32>, tensor<16x3x3x64xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<1x110x110x16xf32> // CHECK-NEXT: return %[[conv]] } // CHECK-LABEL: @do_not_fuse_bconv2d_grouped_convolution_group_size_not_mul_32 -func @do_not_fuse_bconv2d_grouped_convolution_group_size_not_mul_32(%arg0: tensor<1x56x56x4xi32>) -> tensor<1x54x54x128xf32> { +func.func @do_not_fuse_bconv2d_grouped_convolution_group_size_not_mul_32(%arg0: tensor<1x56x56x4xi32>) -> tensor<1x54x54x128xf32> { // A 3x3 filter with 128 input channels (4 per-group) and 128 output channels // (4 per-group). We expect an error to be raised: // @@ -142,7 +142,7 @@ func @do_not_fuse_bconv2d_grouped_convolution_group_size_not_mul_32(%arg0: tenso } // CHECK-LABEL: @fuse_scaled_bconv2d -func @fuse_scaled_bconv2d(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x110x2xf32> { +func.func @fuse_scaled_bconv2d(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x110x2xf32> { %cst = arith.constant dense<[[[[0.3, -0.1], [0.3, 0.1]], [[-0.3, 0.1], [-0.3, 0.1]], [[-0.3, -0.1], [0.3, 0.1]]]]> : tensor<1x3x2x2xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> %1 = "tf.Conv2D"(%0, %cst) {padding = "VALID", strides = [1, 1, 1, 1]} : (tensor<1x112x112x2xf32>, tensor<1x3x2x2xf32>) -> tensor<1x112x110x2xf32> @@ -151,18 +151,18 @@ func @fuse_scaled_bconv2d(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x110x2x // CHECK: %cst = arith.constant // CHECK: %[[post_activation_multiplier:.*]] = arith.constant dense<[3.000000e-01, 1.000000e-01]> : tensor<2xf32> // CHECK: %[[post_activation_bias:.*]] = arith.constant dense<0.000000e+00> : tensor<2xf32> - // CHECK: %[[output_threshold:.*]] = constant unit + // CHECK: %[[output_threshold:.*]] = "tfl.no_value"() {value} : () -> none // CHECK: %[[transpose:.*]] = "tf.Transpose" // CHECK-NEXT: %[[conv:.*]] = "lq.Bconv2d"(%arg0, %[[transpose]], %[[post_activation_multiplier]], %[[post_activation_bias]], %[[output_threshold:.*]]) {channels_in = 2 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<1x112x112x1xi32>, tensor<2x1x3x2xf32>, tensor<2xf32>, tensor<2xf32>, none) -> tensor<1x112x110x2xf32> // CHECK-NEXT: return %[[conv]] } // CHECK-LABEL: @fuse_dilated_bconv -func @fuse_dilated_bconv(%arg0: tensor<1x128x128x1xi32>) -> tensor<1x128x128x8xf32> { +func.func @fuse_dilated_bconv(%arg0: tensor<1x128x128x1xi32>) -> tensor<1x128x128x8xf32> { %cst = arith.constant dense<[2, 2]> : tensor<2xi32> %cst_0 = arith.constant dense<4> : tensor<2x2xi32> %cst_1 = arith.constant dense<1.0> : tensor<5x5x3x8xf32> - %cst_2 = constant unit + %cst_2 = "tfl.no_value"() {value = unit} : () -> none %0 = "lq.Dequantize"(%arg0) : (tensor<1x128x128x1xi32>) -> tensor<1x128x128x3xf32> %1 = "tf.SpaceToBatchND"(%0, %cst, %cst_0) : (tensor<1x128x128x3xf32>, tensor<2xi32>, tensor<2x2xi32>) -> tensor<4x68x68x3xf32> %2 = "tf.Conv2D"(%1, %cst_1) {padding = "VALID", strides = [1, 1, 1, 1]} : (tensor<4x68x68x3xf32>, tensor<5x5x3x8xf32>) -> tensor<4x64x64x8xf32> @@ -171,14 +171,14 @@ func @fuse_dilated_bconv(%arg0: tensor<1x128x128x1xi32>) -> tensor<1x128x128x8xf // CHECK: %[[post_activation_multiplier:.*]] = arith.constant dense<1.000000e+00> : tensor<8xf32> // CHECK: %[[post_activation_bias:.*]] = arith.constant dense<0.000000e+00> : tensor<8xf32> - // CHECK: %[[output_threshold:.*]] = constant unit + // CHECK: %[[output_threshold:.*]] = "tfl.no_value"() {value} : () -> none // CHECK: %[[transpose:.*]] = "tf.Transpose" // CHECK-NEXT: %[[conv:.*]] = "lq.Bconv2d"(%arg0, %[[transpose]], %[[post_activation_multiplier]], %[[post_activation_bias]], %[[output_threshold:.*]]) {channels_in = 3 : i32, dilation_height_factor = 2 : i32, dilation_width_factor = 2 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "VALID", stride_height = 1 : i32, stride_width = 1 : i32} : (tensor<1x128x128x1xi32>, tensor<8x5x5x3xf32>, tensor<8xf32>, tensor<8xf32>, none) -> tensor<1x128x128x8xf32> // CHECK-NEXT: return %[[conv]] : tensor<1x128x128x8xf32> } // CHECK-LABEL: @do_not_fuse_bconv2d_non_binary_weights -func @do_not_fuse_bconv2d_non_binary_weights(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { +func.func @do_not_fuse_bconv2d_non_binary_weights(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { %cst = arith.constant dense<[[[[3.0, -1.0], [0.1, 1.0]], [[-1.0, 1.0], [-1.0, 1.0]]]]> : tensor<1x2x2x2xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> %1 = "tf.Conv2D"(%0, %cst) {padding = "SAME", strides = [1, 1, 1, 1]} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x112x112x2xf32> @@ -191,7 +191,7 @@ func @do_not_fuse_bconv2d_non_binary_weights(%arg0: tensor<1x112x112x1xi32>) -> } // CHECK-LABEL: @do_not_fuse_bconv2d_zero_weight -func @do_not_fuse_bconv2d_zero_weight(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { +func.func @do_not_fuse_bconv2d_zero_weight(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> { %cst = arith.constant dense<0.0> : tensor<1x2x2x2xf32> %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x2xf32> %1 = "tf.Conv2D"(%0, %cst) {padding = "SAME", strides = [1, 1, 1, 1]} : (tensor<1x112x112x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x112x112x2xf32> @@ -204,7 +204,7 @@ func @do_not_fuse_bconv2d_zero_weight(%arg0: tensor<1x112x112x1xi32>) -> tensor< } // CHECK-LABEL: @fuse_bconv2d_same_one_padding -func @fuse_bconv2d_same_one_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x16x16x16xf32> { +func.func @fuse_bconv2d_same_one_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x16x16x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x3x16xf32> %cst0 = arith.constant dense<1.0> : tensor %cst1 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> @@ -215,13 +215,13 @@ func @fuse_bconv2d_same_one_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<25 // CHECK: %[[CST1:.*]] = arith.constant dense<1.000000e+00> : tensor<16xf32> // CHECK: %[[CST2:.*]] = arith.constant dense<0.000000e+00> : tensor<16xf32> - // CHECK: %[[CST3:.*]] = constant unit + // CHECK: %[[CST3:.*]] = "tfl.no_value"() {value} : () -> none // CHECK: %[[TRP:.*]] = "tf.Transpose" // CHECK: %[[CONV:.*]] = "lq.Bconv2d"(%arg0, %[[TRP]], %[[CST1]], %[[CST2]], %[[CST3:.*]]) {channels_in = 3 : i32, dilation_height_factor = 1 : i32, dilation_width_factor = 1 : i32, fused_activation_function = "NONE", pad_values = 1 : i32, padding = "SAME", stride_height = 2 : i32, stride_width = 2 : i32} : (tensor<256x32x32x1xi32>, tensor<16x3x3x3xf32>, tensor<16xf32>, tensor<16xf32>, none) -> tensor<256x16x16x16xf32> } // CHECK-LABEL: @do_not_fuse_bconv2d_padding_same_twice -func @do_not_fuse_bconv2d_padding_same_twice(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x34x34x16xf32> { +func.func @do_not_fuse_bconv2d_padding_same_twice(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x34x34x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x3x16xf32> %cst0 = arith.constant dense<1.0> : tensor %cst1 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> @@ -236,7 +236,7 @@ func @do_not_fuse_bconv2d_padding_same_twice(%arg0: tensor<256x32x32x1xi32>) -> } // CHECK-LABEL: @do_not_fuse_bconv2d_unsupported_constant_padding -func @do_not_fuse_bconv2d_unsupported_constant_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x16xf32> { +func.func @do_not_fuse_bconv2d_unsupported_constant_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x3x16xf32> %cst0 = arith.constant dense<0.0> : tensor %cst1 = arith.constant dense<[[0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32> @@ -251,7 +251,7 @@ func @do_not_fuse_bconv2d_unsupported_constant_padding(%arg0: tensor<256x32x32x1 } // CHECK-LABEL: @do_not_fuse_bconv2d_padding_wrong_size -func @do_not_fuse_bconv2d_padding_wrong_size(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x34x34x16xf32> { +func.func @do_not_fuse_bconv2d_padding_wrong_size(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x34x34x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x3x16xf32> %cst0 = arith.constant dense<1.0> : tensor %cst1 = arith.constant dense<[[0, 0], [2, 2], [2, 2], [0, 0]]> : tensor<4x2xi32> @@ -266,7 +266,7 @@ func @do_not_fuse_bconv2d_padding_wrong_size(%arg0: tensor<256x32x32x1xi32>) -> } // CHECK-LABEL: @do_not_fuse_bconv2d_unsymmetric_padding -func @do_not_fuse_bconv2d_unsymmetric_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x16xf32> { +func.func @do_not_fuse_bconv2d_unsymmetric_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<256x32x32x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x3x16xf32> %cst0 = arith.constant dense<1.0> : tensor %cst1 = arith.constant dense<[[0, 0], [2, 0], [2, 0], [0, 0]]> : tensor<4x2xi32> @@ -281,7 +281,7 @@ func @do_not_fuse_bconv2d_unsymmetric_padding(%arg0: tensor<256x32x32x1xi32>) -> } // CHECK-LABEL: @do_not_fuse_bconv2d_non_spatial_padding -func @do_not_fuse_bconv2d_non_spatial_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<258x32x32x16xf32> { +func.func @do_not_fuse_bconv2d_non_spatial_padding(%arg0: tensor<256x32x32x1xi32>) -> tensor<258x32x32x16xf32> { %cst = arith.constant dense<1.0> : tensor<3x3x5x16xf32> %cst0 = arith.constant dense<1.0> : tensor %cst1 = arith.constant dense<[[1, 1], [1, 1], [1, 1], [1, 1]]> : tensor<4x2xi32> diff --git a/larq_compute_engine/mlir/tests/quantize.mlir b/larq_compute_engine/mlir/tests/quantize.mlir index a0c7393ef..f3ac3f3b6 100644 --- a/larq_compute_engine/mlir/tests/quantize.mlir +++ b/larq_compute_engine/mlir/tests/quantize.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -lce-quantize -verify-diagnostics | FileCheck %s // CHECK-LABEL: quantize_bconv2d -func @quantize_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<32x3x3x1xi32>, %arg2: none) -> tensor<1x112x112x32x!quant.uniform> { +func.func @quantize_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<32x3x3x1xi32>, %arg2: none) -> tensor<1x112x112x32x!quant.uniform> { %cst0 = arith.constant dense<-1.23697901> : tensor<32xf32> %0 = "tfl.quantize"(%cst0) {qtype = tensor<32x!quant.uniform>} : (tensor<32xf32>) -> tensor<32x!quant.uniform> %1 = "tfl.dequantize"(%0) : (tensor<32x!quant.uniform>) -> tensor<32xf32> @@ -19,7 +19,7 @@ func @quantize_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<32x3x3x1xi3 } // CHECK-LABEL: quantize_bitpacked_bconv2d -func @quantize_bitpacked_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<32x3x3x1xi32>, %arg2: none, %arg3: none, %arg4: tensor<32xi32>) -> tensor<1x112x112x32x!quant.uniform> { +func.func @quantize_bitpacked_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<32x3x3x1xi32>, %arg2: none, %arg3: none, %arg4: tensor<32xi32>) -> tensor<1x112x112x32x!quant.uniform> { %0 = "lq.Bconv2d"(%arg0, %arg1, %arg2, %arg3, %arg4) {channels_in = 3 : i32, dilation_height_factor = 2 : i32, dilation_width_factor = 3 : i32, fused_activation_function = "NONE", pad_values = 0 : i32, padding = "SAME", stride_height = 4 : i32, stride_width = 5 : i32} : (tensor<1x224x224x1xi32>, tensor<32x3x3x1xi32>, none, none, tensor<32xi32>) -> tensor<1x112x112x32xf32> %1 = "tfl.quantize"(%0) {qtype = tensor<1x112x112x32x!quant.uniform>} : (tensor<1x112x112x32xf32>) -> tensor<1x112x112x32x!quant.uniform> return %1 : tensor<1x112x112x32x!quant.uniform> @@ -29,7 +29,7 @@ func @quantize_bitpacked_bconv2d(%arg0: tensor<1x224x224x1xi32>, %arg1: tensor<3 } // CHECK-LABEL: quantize_lce_dequantize -func @quantize_lce_dequantize(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x32x!quant.uniform> { +func.func @quantize_lce_dequantize(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x112x32x!quant.uniform> { %0 = "lq.Dequantize"(%arg0) : (tensor<1x112x112x1xi32>) -> tensor<1x112x112x32xf32> %1 = "tfl.quantize"(%0) {qtype = tensor<1x112x112x32x!quant.uniform>} : (tensor<1x112x112x32xf32>) -> tensor<1x112x112x32x!quant.uniform> return %1 : tensor<1x112x112x32x!quant.uniform> @@ -39,7 +39,7 @@ func @quantize_lce_dequantize(%arg0: tensor<1x112x112x1xi32>) -> tensor<1x112x11 } // CHECK-LABEL: dequantize_lce_quantize -func @dequantize_lce_quantize(%arg0: tensor<1x112x112x32x!quant.uniform>) -> tensor<1x112x112x1xi32> { +func.func @dequantize_lce_quantize(%arg0: tensor<1x112x112x32x!quant.uniform>) -> tensor<1x112x112x1xi32> { %0 = "tfl.dequantize"(%arg0) : (tensor<1x112x112x32x!quant.uniform>) -> tensor<1x112x112x32xf32> %1 = "lq.Quantize"(%0) : (tensor<1x112x112x32xf32>) -> tensor<1x112x112x1xi32> return %1 : tensor<1x112x112x1xi32> diff --git a/larq_compute_engine/mlir/tests/set_batch_size.mlir b/larq_compute_engine/mlir/tests/set_batch_size.mlir index 9c4d83eff..f141dba7c 100644 --- a/larq_compute_engine/mlir/tests/set_batch_size.mlir +++ b/larq_compute_engine/mlir/tests/set_batch_size.mlir @@ -1,7 +1,7 @@ // RUN: lce-tf-opt %s -mlir-setbatchsize -verify-diagnostics | FileCheck %s // CHECK-LABEL: @simple -func @simple(%arg0: tensor, %arg1: tensor<2x6xf32>) -> (tensor) { +func.func @simple(%arg0: tensor, %arg1: tensor<2x6xf32>) -> (tensor) { %0 = "tf.AddV2"(%arg0, %arg1) : (tensor, tensor<2x6xf32>) -> tensor return %0 : tensor // CHECK: %arg0: tensor<1x6xf32> @@ -18,7 +18,7 @@ func @simple(%arg0: tensor, %arg1: tensor<2x6xf32>) -> (tensor // Both inputs have a dynamic batch size // CHECK-LABEL: @dual_input_model -func @dual_input_model(%arg0: tensor {tf_saved_model.index_path = ["input_2"]}, %arg1: tensor {tf_saved_model.index_path = ["input_1"]}, %arg2: tensor>> {tf_saved_model.bound_input = @"dense/bias"}, %arg3: tensor>> {tf_saved_model.bound_input = @"dense/kernel"}) -> (tensor {tf_saved_model.index_path = ["tf.__operators__.add"]}) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_2:0,serving_default_input_1:0", outputs = "StatefulPartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} { +func.func @dual_input_model(%arg0: tensor {tf_saved_model.index_path = ["input_2"]}, %arg1: tensor {tf_saved_model.index_path = ["input_1"]}, %arg2: tensor>> {tf_saved_model.bound_input = @"dense/bias"}, %arg3: tensor>> {tf_saved_model.bound_input = @"dense/kernel"}) -> (tensor {tf_saved_model.index_path = ["tf.__operators__.add"]}) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_2:0,serving_default_input_1:0", outputs = "StatefulPartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} { %0 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor>>) -> tensor<6xf32> %1 = "tf.ReadVariableOp"(%arg3) {device = ""} : (tensor>>) -> tensor<4x6xf32> %2 = "tf.MatMul"(%arg1, %1) {device = "", transpose_a = false, transpose_b = false} : (tensor, tensor<4x6xf32>) -> tensor @@ -36,7 +36,7 @@ func @dual_input_model(%arg0: tensor {tf_saved_model.index_path = ["inp // This is the same model, but one of the two inputs has been given a fixed batch size in Python // CHECK-LABEL: @dual_input_one_fixed_size -func @dual_input_one_fixed_size(%arg0: tensor {tf_saved_model.index_path = ["input_2"]}, %arg1: tensor<1x4xf32> {tf_saved_model.index_path = ["input_1"]}, %arg2: tensor>> {tf_saved_model.bound_input = @"dense/bias"}, %arg3: tensor>> {tf_saved_model.bound_input = @"dense/kernel"}) -> (tensor {tf_saved_model.index_path = ["tf.__operators__.add"]}) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_2:0,serving_default_input_1:0", outputs = "StatefulPartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} { +func.func @dual_input_one_fixed_size(%arg0: tensor {tf_saved_model.index_path = ["input_2"]}, %arg1: tensor<1x4xf32> {tf_saved_model.index_path = ["input_1"]}, %arg2: tensor>> {tf_saved_model.bound_input = @"dense/bias"}, %arg3: tensor>> {tf_saved_model.bound_input = @"dense/kernel"}) -> (tensor {tf_saved_model.index_path = ["tf.__operators__.add"]}) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_2:0,serving_default_input_1:0", outputs = "StatefulPartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} { %0 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor>>) -> tensor<6xf32> %1 = "tf.ReadVariableOp"(%arg3) {device = ""} : (tensor>>) -> tensor<4x6xf32> %2 = "tf.MatMul"(%arg1, %1) {device = "", transpose_a = false, transpose_b = false} : (tensor<1x4xf32>, tensor<4x6xf32>) -> tensor<1x6xf32> diff --git a/larq_compute_engine/mlir/tf_tfl_passes.cc b/larq_compute_engine/mlir/tf_tfl_passes.cc index 20d622647..cd51f9069 100644 --- a/larq_compute_engine/mlir/tf_tfl_passes.cc +++ b/larq_compute_engine/mlir/tf_tfl_passes.cc @@ -23,29 +23,34 @@ const char kTFLiteDataLayout[] = "NHWC"; } // namespace namespace { -void AddQuantizationPasses(const mlir::TFL::QuantizationSpecs& quant_specs, - mlir::OpPassManager* pass_manager) { - pass_manager->addNestedPass( +void AddQuantizationPasses(const mlir::quant::QuantizationSpecs& quant_specs, + mlir::OpPassManager& pass_manager) { + pass_manager.addNestedPass( mlir::TFL::CreatePrepareQuantizePass(quant_specs)); - pass_manager->addNestedPass(mlir::TFL::CreateLCEQuantizePass()); + pass_manager.addNestedPass( + mlir::TFL::CreateLCEQuantizePass()); if (quant_specs.default_ranges.first.hasValue() || quant_specs.default_ranges.second.hasValue()) { - pass_manager->addNestedPass( + pass_manager.addNestedPass( mlir::TFL::CreateDefaultQuantParamsPass( quant_specs.default_ranges.first.getValueOr(0.0), quant_specs.default_ranges.second.getValueOr(0.0), quant_specs.IsSignedInferenceType())); - pass_manager->addNestedPass( + pass_manager.addNestedPass( mlir::TFL::CreateLCEQuantizePass()); } - pass_manager->addNestedPass(mlir::TFL::CreateQuantizePass()); + pass_manager.addNestedPass( + mlir::TFL::CreateQuantizePass()); bool emit_quant_adaptor_ops = quant_specs.inference_type != quant_specs.inference_input_type; - pass_manager->addNestedPass( + pass_manager.addNestedPass( mlir::TFL::CreatePostQuantizePass(emit_quant_adaptor_ops)); - pass_manager->addNestedPass(mlir::TFL::CreateLCEQuantizePass()); - pass_manager->addNestedPass( + pass_manager.addNestedPass( + mlir::TFL::CreateLCEQuantizePass()); + pass_manager.addNestedPass( mlir::TFL::CreatePostQuantizePass(emit_quant_adaptor_ops)); + pass_manager.addNestedPass( + mlir::TFL::CreateOptimizeOpOrderPass()); } } // namespace @@ -57,14 +62,14 @@ void AddPreVariableFreezingTFToLCETFLConversionPasses( // This pass wraps all the tf.FakeQuant ops in a custom op so they are not // folded before being converted to tfl.quantize and tfl.dequantize ops. auto wrapped_ops = mlir::TFL::AllTfFakeQuantOps(); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateRaiseCustomOpsPass(wrapped_ops)); mlir::TF::StandardPipelineOptions standard_pipeline_options; standard_pipeline_options.enable_inliner = false; standard_pipeline_options.form_clusters = false; mlir::TF::CreateTFStandardPipeline(*pass_manager, standard_pipeline_options); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TF::CreateDeviceIndexSelectorPass()); // Add canonicalize pass to remove no-op session initializer pass. @@ -82,8 +87,10 @@ void AddPreVariableFreezingTFToLCETFLConversionPasses( // This decomposes resource ops like ResourceGather into read-variable op // followed by gather. This is used when the saved model import path is used // during which resources dont get frozen in the python layer. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFDevice::CreateDecomposeResourceOpsPass()); + + pass_manager->addPass(mlir::TF::CreateTFRegionControlFlowToFunctional()); } // This is the later part of the conversion in isolation. This enables a caller @@ -91,7 +98,7 @@ void AddPreVariableFreezingTFToLCETFLConversionPasses( // it. void AddPostVariableFreezingTFToLCETFLConversionPasses( llvm::StringRef saved_model_dir, - const mlir::TFL::QuantizationSpecs& quant_specs, + const mlir::quant::QuantizationSpecs& quant_specs, mlir::OpPassManager* pass_manager, const LCETarget target) { // Note: // We need to fuse composite ops before LowerStaticTensorList pass. @@ -99,8 +106,6 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // Enable fusing composite ops that can be lowered to built-in TFLite ops. pass_manager->addPass(mlir::TFL::CreatePrepareCompositeFunctionsPass()); - pass_manager->addPass(mlir::TF::CreateTFRegionControlFlowToFunctional()); - pass_manager->addPass(mlir::createInlinerPass()); pass_manager->addPass(mlir::createSymbolDCEPass()); @@ -113,7 +118,8 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // Set the batch size of the function input to 1 and let shape inference // propagate this in the next pass. - pass_manager->addNestedPass(mlir::CreateSetBatchSizePass()); + pass_manager->addNestedPass( + mlir::CreateSetBatchSizePass()); // Add a shape inference pass to optimize away the unnecessary casts. pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); @@ -131,7 +137,8 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // Remove passthrough ops early so constant folding can happen before // LCE ops are injected - pass_manager->addNestedPass(mlir::TFL::CreateOpRemovalPass()); + pass_manager->addNestedPass( + mlir::TFL::CreateOpRemovalPass()); // The following pass used to be just after createSymbolDCEPass but we move it // before createCanonicalizerPass because without it, the tf.Sign op is not @@ -154,13 +161,14 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // 2. The body and cond are inlined. // 3. We need to do this before while canonicalization, otherwise it would be // difficult to find dependencies. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateReduceWhileOperandsPass()); // Canonicalization includes const folding, which is utilized here to optimize // away ops that can't get constant folded after PrepareTF pass. For example, // tf.Conv2D is split into tf.Transpose and tfl.Conv2D. - pass_manager->addNestedPass(mlir::createCanonicalizerPass()); - pass_manager->addNestedPass(mlir::createCSEPass()); + pass_manager->addNestedPass( + mlir::createCanonicalizerPass()); + pass_manager->addNestedPass(mlir::createCSEPass()); // This pass does dead code elimination based on symbol visibility. pass_manager->addPass(mlir::createSymbolDCEPass()); @@ -171,16 +179,17 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( mlir::TF::LayoutOptimizationPipelineOptions layout_optimization_options; layout_optimization_options.force_data_format = kTFLiteDataLayout; layout_optimization_options.skip_fold_transpose_in_ops = true; - mlir::TF::CreateLayoutOptimizationPipeline(pass_manager->nest(), - layout_optimization_options); + mlir::TF::CreateLayoutOptimizationPipeline( + pass_manager->nest(), layout_optimization_options); // Inject Larq Compute Engine Ops - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreatePrepareLCEPass(target)); // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreatePrepareTFPass(true, false)); - pass_manager->addNestedPass(mlir::createCanonicalizerPass()); + pass_manager->addNestedPass( + mlir::createCanonicalizerPass()); // Add a shape inference pass to optimize away the unnecessary casts. // This also fixes the unranked shapes due to TF ops constant folding. // TODO(fengliuai): remove this pass if TableGen patterns have a better @@ -192,25 +201,25 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( pass_manager->addPass(mlir::createInlinerPass()); // This pass removes the asset file dependencies in hash table use cases. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TF::CreateInitTextFileToImportPass(saved_model_dir.str())); // This pass removes the asset file dependencies in hash table use cases. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TF::CreateInitTextFileToImportPass()); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateLegalizeTFPass(true)); pass_manager->addPass(mlir::TFL::CreateAnalyzeVariablesPass()); pass_manager->addPass(mlir::TFL::CreateLegalizeVariablesPass()); pass_manager->addPass(mlir::TFL::CreateLegalizeHashTablesPass()); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateOptimizeLCEPass(target)); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateOptimizePass(true)); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateOptimizeLCEPass(target)); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateBitpackWeightsLCEPass()); // This pass operates on TensorFlow ops but is triggered after legalization @@ -218,18 +227,20 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // are removed during legalization. pass_manager->addPass(mlir::TFL::CreateOptimizeFunctionalOpsPass()); std::vector empty_wrapped_ops({}); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateRaiseCustomOpsPass(empty_wrapped_ops)); pass_manager->addPass(mlir::createSymbolDCEPass()); - pass_manager->addNestedPass(mlir::createCanonicalizerPass()); - pass_manager->addNestedPass(mlir::createCSEPass()); + pass_manager->addNestedPass( + mlir::createCanonicalizerPass()); + pass_manager->addNestedPass(mlir::createCSEPass()); - pass_manager->addNestedPass(mlir::TFL::CreateFusePaddingPass()); + pass_manager->addNestedPass( + mlir::TFL::CreateFusePaddingPass()); // Run quantization after all the floating point model conversion is // completed. if (quant_specs.RunPropagationAndRewriteQuantizationPasses()) { - AddQuantizationPasses(quant_specs, pass_manager); + AddQuantizationPasses(quant_specs, *pass_manager); } pass_manager->addPass(mlir::createCanonicalizerPass()); @@ -240,7 +251,7 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( // it's not desired by TFL. This pass serves as a "fix" pass to split the // merged inputs until we have 1st class variable support or reuse // tf.variable to model this. - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateSplitMergedOperandsPass()); // Add CallOnceOp when there is a session initializer function in tf saved @@ -249,10 +260,11 @@ void AddPostVariableFreezingTFToLCETFLConversionPasses( mlir::TFL::CreateInsertCallOnceOpFromSessionInitializerPass()); pass_manager->addPass(mlir::TFL::CreateUnfoldLargeSplatConstantPass()); pass_manager->addPass(mlir::TFL::CreateWhileOutlinePass()); - pass_manager->addNestedPass( + pass_manager->addNestedPass( mlir::TFL::CreateRuntimeVerifyPass()); - pass_manager->addNestedPass(mlir::TFL::CreateLegalizeLCEPass()); + pass_manager->addNestedPass( + mlir::TFL::CreateLegalizeLCEPass()); } } // namespace tensorflow diff --git a/larq_compute_engine/mlir/tf_tfl_passes.h b/larq_compute_engine/mlir/tf_tfl_passes.h index f5d3d923d..18a7dbe8f 100644 --- a/larq_compute_engine/mlir/tf_tfl_passes.h +++ b/larq_compute_engine/mlir/tf_tfl_passes.h @@ -14,7 +14,7 @@ void AddPreVariableFreezingTFToLCETFLConversionPasses( void AddPostVariableFreezingTFToLCETFLConversionPasses( llvm::StringRef saved_model_dir, - const mlir::TFL::QuantizationSpecs& quant_specs, + const mlir::quant::QuantizationSpecs& quant_specs, mlir::OpPassManager* pass_manager, const LCETarget target = LCETarget::ARM); } // namespace tensorflow diff --git a/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.cc b/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.cc index e5a4a9520..9f82f4397 100644 --- a/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.cc +++ b/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.cc @@ -57,7 +57,7 @@ class TruncateOpOrArgLocNameMapper : public OpOrArgLocNameMapper { } // namespace Status ConvertTFExecutorToTFLOrFlatbuffer( mlir::ModuleOp module, bool export_to_mlir, const LCETarget target, - mlir::TFL::QuantizationSpecs quant_specs, + mlir::quant::QuantizationSpecs quant_specs, const std::unordered_set& saved_model_tags, llvm::StringRef saved_model_dir, llvm::Optional session, std::string* result) { diff --git a/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.h b/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.h index f1aa84d1a..03acba570 100644 --- a/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.h +++ b/larq_compute_engine/mlir/tf_to_tfl_flatbuffer.h @@ -15,7 +15,7 @@ namespace tensorflow { // https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h#L60-L78 Status ConvertTFExecutorToTFLOrFlatbuffer( mlir::ModuleOp module, bool export_to_mlir, const LCETarget target, - mlir::TFL::QuantizationSpecs quant_specs, + mlir::quant::QuantizationSpecs quant_specs, const std::unordered_set& saved_model_tags, llvm::StringRef saved_model_dir, llvm::Optional session, std::string* result); diff --git a/larq_compute_engine/mlir/transforms/bitpack_activations_patterns.td b/larq_compute_engine/mlir/transforms/bitpack_activations_patterns.td index 611d5b221..87f024eac 100644 --- a/larq_compute_engine/mlir/transforms/bitpack_activations_patterns.td +++ b/larq_compute_engine/mlir/transforms/bitpack_activations_patterns.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "larq_compute_engine/mlir/ir/lce_ops.td" @@ -10,7 +11,8 @@ def F32ElementsAttr : ElementsAttrBase< // Checks if the value has only one user. def HasOneUse : Constraint>; -def CreateNoneAttrValue : NativeCodeCall<"$_builder.getUnitAttr()">; +def CreateNoneValue : NativeCodeCall< + "$_builder.create($0.getLoc(), $_builder.getUnitAttr())">; def GetSignsOfVectorAndBroadcast4D : NativeCodeCall<"GetSignsOfVectorAndBroadcast4D($0)">; def GetBitpackedOutputThresholds : NativeCodeCall<"GetBitpackedOutputThresholds($_builder, $0, $1, $2, $3)">; @@ -21,7 +23,7 @@ class WriteBitpackedActivationsPat { +struct BitpackWeightsLCE + : public PassWrapper> { llvm::StringRef getArgument() const final { return "tfl-lce-bitpack-weights"; } llvm::StringRef getDescription() const final { return "Bitpack binary weights"; } - void runOnFunction() override; + void runOnOperation() override; }; bool IsConv2DFilter(Attribute filter) { @@ -30,9 +31,9 @@ bool IsConv2DFilter(Attribute filter) { #include "larq_compute_engine/mlir/transforms/generated_bitpack_weights.inc" -void BitpackWeightsLCE::runOnFunction() { - OwningRewritePatternList patterns(&getContext()); - auto func = getFunction(); +void BitpackWeightsLCE::runOnOperation() { + RewritePatternSet patterns(&getContext()); + auto func = getOperation(); TFL::populateWithGenerated(patterns); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); diff --git a/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td b/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td index 47f978d33..b4bc91c50 100644 --- a/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td +++ b/larq_compute_engine/mlir/transforms/bitpack_weights_patterns.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "larq_compute_engine/mlir/ir/lce_ops.td" diff --git a/larq_compute_engine/mlir/transforms/fuse_padding.cc b/larq_compute_engine/mlir/transforms/fuse_padding.cc index 2761478e6..c8b90ff05 100644 --- a/larq_compute_engine/mlir/transforms/fuse_padding.cc +++ b/larq_compute_engine/mlir/transforms/fuse_padding.cc @@ -36,7 +36,7 @@ bool IsSamePaddingPartial(Attribute paddings_attr, Value input, Value output, #include "larq_compute_engine/mlir/transforms/generated_fuse_padding.inc" // Prepare LCE operations in functions for subsequent legalization. -struct FusePadding : public PassWrapper { +struct FusePadding : public PassWrapper> { llvm::StringRef getArgument() const final { return "tfl-fuse-padding"; } llvm::StringRef getDescription() const final { return "Fuse padding ops into (Depthwise)Convs"; @@ -44,10 +44,10 @@ struct FusePadding : public PassWrapper { FusePadding() = default; FusePadding(const FusePadding& pass) {} - void runOnFunction() override { + void runOnOperation() override { auto* ctx = &getContext(); - OwningRewritePatternList patterns(ctx); - auto func = getFunction(); + RewritePatternSet patterns(ctx); + auto func = getOperation(); populateWithGenerated(patterns); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); } diff --git a/larq_compute_engine/mlir/transforms/fuse_padding.td b/larq_compute_engine/mlir/transforms/fuse_padding.td index d94825b4a..d49fe5fcd 100644 --- a/larq_compute_engine/mlir/transforms/fuse_padding.td +++ b/larq_compute_engine/mlir/transforms/fuse_padding.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" diff --git a/larq_compute_engine/mlir/transforms/legalize_tflite.cc b/larq_compute_engine/mlir/transforms/legalize_tflite.cc index 861ca84af..b0bf56848 100644 --- a/larq_compute_engine/mlir/transforms/legalize_tflite.cc +++ b/larq_compute_engine/mlir/transforms/legalize_tflite.cc @@ -9,7 +9,7 @@ namespace TFL { namespace { -struct LegalizeLCE : public PassWrapper { +struct LegalizeLCE : public PassWrapper> { llvm::StringRef getArgument() const final { return "tfl-legalize-lce"; } llvm::StringRef getDescription() const final { return "Legalize LCE ops in TensorFlow Lite dialect"; @@ -17,7 +17,7 @@ struct LegalizeLCE : public PassWrapper { void getDependentDialects(DialectRegistry& registry) const override { registry.insert(); } - void runOnFunction() override; + void runOnOperation() override; }; template @@ -41,12 +41,12 @@ struct LegalizeToCustomOp : public OpRewritePattern { } }; -void LegalizeLCE::runOnFunction() { - OwningRewritePatternList patterns(&getContext()); +void LegalizeLCE::runOnOperation() { + RewritePatternSet patterns(&getContext()); auto* ctx = &getContext(); - auto func = getFunction(); + auto func = getOperation(); - patterns.insert< + patterns.add< LegalizeToCustomOp, LegalizeToCustomOp, LegalizeToCustomOp, LegalizeToCustomOp>( ctx); diff --git a/larq_compute_engine/mlir/transforms/op_removal.cc b/larq_compute_engine/mlir/transforms/op_removal.cc index d508eee5d..192543c6a 100644 --- a/larq_compute_engine/mlir/transforms/op_removal.cc +++ b/larq_compute_engine/mlir/transforms/op_removal.cc @@ -1,4 +1,4 @@ -#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -11,19 +11,19 @@ namespace { // Op removal of pass through ops to make following patterns easier and enable // early constant folding -struct OpRemoval : public PassWrapper { +struct OpRemoval : public PassWrapper> { llvm::StringRef getArgument() const final { return "lce-op-removal-tf"; } llvm::StringRef getDescription() const final { return "Remove pass through TensorFlow ops"; } - void runOnFunction() override; + void runOnOperation() override; }; #include "larq_compute_engine/mlir/transforms/generated_op_removal.inc" -void OpRemoval::runOnFunction() { - OwningRewritePatternList patterns(&getContext()); - auto func = getFunction(); +void OpRemoval::runOnOperation() { + RewritePatternSet patterns(&getContext()); + auto func = getOperation(); TFL::populateWithGenerated(patterns); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); diff --git a/larq_compute_engine/mlir/transforms/op_removal_patterns.td b/larq_compute_engine/mlir/transforms/op_removal_patterns.td index 333985559..1ed7a073c 100644 --- a/larq_compute_engine/mlir/transforms/op_removal_patterns.td +++ b/larq_compute_engine/mlir/transforms/op_removal_patterns.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td" diff --git a/larq_compute_engine/mlir/transforms/optimize.cc b/larq_compute_engine/mlir/transforms/optimize.cc index 4a29b9dd9..805f85f7c 100644 --- a/larq_compute_engine/mlir/transforms/optimize.cc +++ b/larq_compute_engine/mlir/transforms/optimize.cc @@ -6,7 +6,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" @@ -19,7 +19,7 @@ namespace TFL { namespace { // Optimize LCE operations in functions. -struct OptimizeLCE : public PassWrapper { +struct OptimizeLCE : public PassWrapper> { llvm::StringRef getArgument() const final { return "tfl-optimize-lce"; } llvm::StringRef getDescription() const final { return "Optimize within the TensorFlow Lite dialect"; @@ -28,7 +28,7 @@ struct OptimizeLCE : public PassWrapper { OptimizeLCE(const OptimizeLCE& pass) {} OptimizeLCE(const LCETarget target) { target_.setValue(target); } - void runOnFunction() override; + void runOnOperation() override; private: Option target_{ @@ -265,10 +265,9 @@ namespace bitpack_activations { #include "larq_compute_engine/mlir/transforms/generated_bitpack_activations.inc" } -void OptimizeLCE::runOnFunction() { - auto* ctx = &getContext(); - OwningRewritePatternList patterns(ctx); - auto func = getFunction(); +void OptimizeLCE::runOnOperation() { + RewritePatternSet patterns(&getContext()); + auto func = getOperation(); if (target_ == LCETarget::ARM) { target_arm::populateWithGenerated(patterns); diff --git a/larq_compute_engine/mlir/transforms/optimize_patterns_common.td b/larq_compute_engine/mlir/transforms/optimize_patterns_common.td index c5d68a891..678eca1e3 100644 --- a/larq_compute_engine/mlir/transforms/optimize_patterns_common.td +++ b/larq_compute_engine/mlir/transforms/optimize_patterns_common.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "larq_compute_engine/mlir/ir/lce_ops.td" diff --git a/larq_compute_engine/mlir/transforms/passes.h b/larq_compute_engine/mlir/transforms/passes.h index 21f23e5b2..807ff35ab 100644 --- a/larq_compute_engine/mlir/transforms/passes.h +++ b/larq_compute_engine/mlir/transforms/passes.h @@ -1,6 +1,7 @@ #ifndef LARQ_COMPUTE_ENGINE_MLIR_PASSES_H_ #define LARQ_COMPUTE_ENGINE_MLIR_PASSES_H_ +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" enum LCETarget { ARM = 0, XCORE = 1 }; @@ -9,33 +10,35 @@ namespace mlir { namespace TFL { // Creates an instance of the TensorFlow dialect OpRemoval pass. -std::unique_ptr> CreateOpRemovalPass(); +std::unique_ptr> CreateOpRemovalPass(); // Creates an instance of the TensorFlow dialect PrepareLCE pass. -std::unique_ptr> CreatePrepareLCEPass(LCETarget target); +std::unique_ptr> CreatePrepareLCEPass( + LCETarget target); // Creates an instance of the TensorFlow dialect OptimizeLCE pass. -std::unique_ptr> CreateOptimizeLCEPass(LCETarget target); +std::unique_ptr> CreateOptimizeLCEPass( + LCETarget target); // Creates an instance of the TensorFlow dialect BitpackWeightsLCE pass. -std::unique_ptr> CreateBitpackWeightsLCEPass(); +std::unique_ptr> CreateBitpackWeightsLCEPass(); // Creates an instance of the TensorFlow Lite dialect QuantizeTFL pass. -std::unique_ptr> CreateLCEQuantizePass(); +std::unique_ptr> CreateLCEQuantizePass(); // Creates an instance of LegalizeLCE pass. -std::unique_ptr> CreateLegalizeLCEPass(); +std::unique_ptr> CreateLegalizeLCEPass(); // Creates an instance of the FusePadding pass. -std::unique_ptr> CreateFusePaddingPass(); +std::unique_ptr> CreateFusePaddingPass(); // Creates an instance of TranslateToLCE pass. -std::unique_ptr> CreateTranslateToLCEPass(); +std::unique_ptr> CreateTranslateToLCEPass(); } // namespace TFL // Creates an instance of the TensorFlow dialect SetBatchSize pass -std::unique_ptr> CreateSetBatchSizePass(); +std::unique_ptr> CreateSetBatchSizePass(); } // namespace mlir diff --git a/larq_compute_engine/mlir/transforms/prepare_patterns_common.td b/larq_compute_engine/mlir/transforms/prepare_patterns_common.td index fc51e5b6f..a7be4d4dd 100644 --- a/larq_compute_engine/mlir/transforms/prepare_patterns_common.td +++ b/larq_compute_engine/mlir/transforms/prepare_patterns_common.td @@ -1,4 +1,5 @@ -include "mlir/Dialect/StandardOps/IR/Ops.td" +include "mlir/IR/PatternBase.td" +include "mlir/Dialect/Func/IR/FuncOps.td" include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td" @@ -6,6 +7,8 @@ include "larq_compute_engine/mlir/ir/lce_ops.td" include "larq_compute_engine/mlir/transforms/op_removal_patterns.td" class ConstantValue : AttrConstraint>; +def CreateNoneValue : NativeCodeCall< + "$_builder.create($0.getLoc(), $_builder.getUnitAttr())">; def CreateTFBroadcastToOp : NativeCodeCall< "$_builder.create(" @@ -87,7 +90,7 @@ def GetScaleVector : NativeCodeCall<"GetScaleVector($0)">; def GetNumChannels : NativeCodeCall<"GetNumChannels($_builder, $0)">; def ValidFilterShape : Constraint>; def IsDataFormatNHWC : ConstantAttr; -def CreateNoneAttrValue : NativeCodeCall<"$_builder.getUnitAttr()">; + // All targets support this pattern with "VALID" padding, but only the "arm" // target supports it with "SAME" padding. @@ -110,7 +113,7 @@ class PrepareBConvPadValue0Pat : (Arith_ConstantOp ConstantAttr, "{3, 0, 1, 2}">)), (Arith_ConstantOp (GetScaleVector $filter)), (Arith_ConstantOp (GetConstantVector<"0.0f"> $filter)), - (ConstantOp (CreateNoneAttrValue)), + (CreateNoneValue $input), (GetNumChannels $dequantized_input), ExtractI32At<1>:$dilations, ExtractI32At<2>:$dilations, @@ -151,7 +154,7 @@ def : Pat<(TF_Conv2DOp:$output (Arith_ConstantOp ConstantAttr, "{3, 0, 1, 2}">)), (Arith_ConstantOp (GetScaleVector $filter)), (Arith_ConstantOp (GetConstantVector<"0.0f"> $filter)), - (ConstantOp (CreateNoneAttrValue)), + (CreateNoneValue $input), (GetNumChannels $dequantized_input), ExtractI32At<1>:$dilations, ExtractI32At<2>:$dilations, diff --git a/larq_compute_engine/mlir/transforms/prepare_tf.cc b/larq_compute_engine/mlir/transforms/prepare_tf.cc index 635868346..f7d966452 100644 --- a/larq_compute_engine/mlir/transforms/prepare_tf.cc +++ b/larq_compute_engine/mlir/transforms/prepare_tf.cc @@ -2,7 +2,7 @@ #include "larq_compute_engine/mlir/ir/lce_ops.h" #include "larq_compute_engine/mlir/transforms/padding.h" #include "larq_compute_engine/mlir/transforms/passes.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -19,14 +19,14 @@ namespace { using compute_engine::core::bitpacking_bitwidth; // Prepare LCE operations in functions for subsequent legalization. -struct PrepareLCE : public PassWrapper { +struct PrepareLCE : public PassWrapper> { llvm::StringRef getArgument() const final { return "tfl-prepare-lce"; } llvm::StringRef getDescription() const final { return "Inject LCE Ops"; } PrepareLCE() = default; PrepareLCE(const PrepareLCE& pass) {} PrepareLCE(const LCETarget target) { target_.setValue(target); } - void runOnFunction() override; + void runOnOperation() override; void getDependentDialects(DialectRegistry& registry) const override { registry.insert(); } @@ -169,15 +169,15 @@ namespace target_other { #include "larq_compute_engine/mlir/transforms/generated_prepare_target_other.inc" } -void PrepareLCE::runOnFunction() { +void PrepareLCE::runOnOperation() { auto* ctx = &getContext(); - OwningRewritePatternList patterns(ctx); - auto func = getFunction(); + RewritePatternSet patterns(ctx); + auto func = getOperation(); // This pattern will try to identify and optimize for dilated convolution. // e.g. Patterns like "SpaceToBatchND -> Conv2D -> BatchToSpaceND" will be // replaced with a single Conv op with dilation parameter. - patterns.insert>(ctx); + patterns.add>(ctx); if (target_ == LCETarget::ARM) { target_arm::populateWithGenerated(patterns); diff --git a/larq_compute_engine/mlir/transforms/quantize.cc b/larq_compute_engine/mlir/transforms/quantize.cc index 9f8d3e11a..178cae2e5 100644 --- a/larq_compute_engine/mlir/transforms/quantize.cc +++ b/larq_compute_engine/mlir/transforms/quantize.cc @@ -15,19 +15,20 @@ namespace TFL { namespace { // Applies quantization on the model in TFL dialect. -struct LCEQuantizePass : public PassWrapper { +struct LCEQuantizePass + : public PassWrapper> { llvm::StringRef getArgument() const final { return "lce-quantize"; } llvm::StringRef getDescription() const final { return "Apply hybrid quantization on models in TensorFlow Lite dialect"; } - void runOnFunction() override; + void runOnOperation() override; }; #include "larq_compute_engine/mlir/transforms/generated_quantize.inc" -void LCEQuantizePass::runOnFunction() { - OwningRewritePatternList patterns(&getContext()); - auto func = getFunction(); +void LCEQuantizePass::runOnOperation() { + RewritePatternSet patterns(&getContext()); + auto func = getOperation(); TFL::populateWithGenerated(patterns); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); } diff --git a/larq_compute_engine/mlir/transforms/quantize_patterns.td b/larq_compute_engine/mlir/transforms/quantize_patterns.td index c6e239f42..553734fdb 100644 --- a/larq_compute_engine/mlir/transforms/quantize_patterns.td +++ b/larq_compute_engine/mlir/transforms/quantize_patterns.td @@ -1,3 +1,4 @@ +include "mlir/IR/PatternBase.td" include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td" include "larq_compute_engine/mlir/ir/lce_ops.td" diff --git a/larq_compute_engine/mlir/transforms/set_batch_size.cc b/larq_compute_engine/mlir/transforms/set_batch_size.cc index 7ac56f068..195cb4895 100644 --- a/larq_compute_engine/mlir/transforms/set_batch_size.cc +++ b/larq_compute_engine/mlir/transforms/set_batch_size.cc @@ -1,3 +1,4 @@ +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" // This pass will set the batch dimension of all inputs of the outermost @@ -22,17 +23,18 @@ mlir::Type SetBatchSize(mlir::Type type) { return nullptr; } -struct SetBatchSizePass : public PassWrapper { +struct SetBatchSizePass + : public PassWrapper> { llvm::StringRef getArgument() const final { return "mlir-setbatchsize"; } llvm::StringRef getDescription() const final { return "Set batch size to 1"; } - void runOnFunction() override { - FuncOp func = getFunction(); + void runOnOperation() override { + FuncOp func = getOperation(); // We have to edit both the function signature (mlir::Type) *and* the // function arguments (mlir::Value) // mlir::FunctionType is a TableGen-autogenerated MLIR type - mlir::FunctionType signature = func.getType(); + mlir::FunctionType signature = func.getFunctionType(); // Create a mutable copy of the input types, since getInputs returns an // immutable llvm::ArrayRef @@ -46,7 +48,7 @@ struct SetBatchSizePass : public PassWrapper { auto signature_new = mlir::FunctionType::get( signature.getContext(), signature_inputs, signature.getResults()); // Set the new signature - func.typeAttr(mlir::TypeAttr::get(signature_new)); + func.setFunctionTypeAttr(mlir::TypeAttr::get(signature_new)); // Now apply the same change to the mlir::Value objects for (mlir::BlockArgument arg : func.getArguments()) { diff --git a/larq_compute_engine/mlir/transforms/translate_tflite.cc b/larq_compute_engine/mlir/transforms/translate_tflite.cc index a3efd4531..c2230aec5 100644 --- a/larq_compute_engine/mlir/transforms/translate_tflite.cc +++ b/larq_compute_engine/mlir/transforms/translate_tflite.cc @@ -10,7 +10,8 @@ namespace TFL { namespace { -struct TranslateToLCE : public PassWrapper { +struct TranslateToLCE + : public PassWrapper> { llvm::StringRef getArgument() const final { return "lce-translate-tfl"; } llvm::StringRef getDescription() const final { return "Translate TFL custom ops to LCE ops"; @@ -18,7 +19,7 @@ struct TranslateToLCE : public PassWrapper { void getDependentDialects(DialectRegistry& registry) const override { registry.insert(); } - void runOnFunction() override; + void runOnOperation() override; }; struct TranslateToLCEPattern : public OpRewritePattern { @@ -65,11 +66,11 @@ struct TranslateToLCEPattern : public OpRewritePattern { } }; -void TranslateToLCE::runOnFunction() { - OwningRewritePatternList patterns(&getContext()); +void TranslateToLCE::runOnOperation() { + RewritePatternSet patterns(&getContext()); auto* ctx = &getContext(); - auto func = getFunction(); - patterns.insert(ctx); + auto func = getOperation(); + patterns.add(ctx); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); } diff --git a/larq_compute_engine/tflite/java/BUILD b/larq_compute_engine/tflite/java/BUILD index e067b4b44..9d2ac4aae 100644 --- a/larq_compute_engine/tflite/java/BUILD +++ b/larq_compute_engine/tflite/java/BUILD @@ -31,7 +31,7 @@ android_library( java_library( name = "tensorflowlite_java", - resource_jars = [ + runtime_deps = [ "@org_tensorflow//tensorflow/lite/java:tensorflowlite", ], ) diff --git a/third_party/tensorflow b/third_party/tensorflow index 3f878cff5..8a20d54a3 160000 --- a/third_party/tensorflow +++ b/third_party/tensorflow @@ -1 +1 @@ -Subproject commit 3f878cff5b698b82eea85db2b60d65a2e320850e +Subproject commit 8a20d54a3c1bfa38c03ea99a2ad3c1b0a45dfa95 diff --git a/third_party/tensorflow_patches/fix_armhf_xnnpack.patch b/third_party/tensorflow_patches/fix_armhf_xnnpack.patch index 1c382716f..ae01a916a 100644 --- a/third_party/tensorflow_patches/fix_armhf_xnnpack.patch +++ b/third_party/tensorflow_patches/fix_armhf_xnnpack.patch @@ -1,20 +1,8 @@ -diff --git a/.bazelrc b/.bazelrc -index b1d4eee905d..ada1395298f 100644 ---- a/.bazelrc -+++ b/.bazelrc -@@ -572,6 +572,7 @@ build:elinux_aarch64 --distinct_host_configuration=true - build:elinux_armhf --config=elinux - build:elinux_armhf --cpu=armhf - build:elinux_armhf --distinct_host_configuration=true -+build:elinux_armhf --copt -mfp16-format=ieee - # END TF REMOTE BUILD EXECUTION OPTIONS - - # Config-specific options should come above this line. diff --git a/tensorflow/lite/tools/evaluation/BUILD b/tensorflow/lite/tools/evaluation/BUILD -index 86bfa35bd1e..e22e9efd840 100644 +index 02ca40fad25cf..c2b12b7741dfc 100644 --- a/tensorflow/lite/tools/evaluation/BUILD +++ b/tensorflow/lite/tools/evaluation/BUILD -@@ -53,7 +53,6 @@ cc_library( +@@ -54,7 +54,6 @@ cc_library( ], "//conditions:default": [], }) + select({ diff --git a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/BUILD b/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/BUILD deleted file mode 100644 index 44172e9f5..000000000 --- a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/BUILD +++ /dev/null @@ -1,118 +0,0 @@ -# This file is expanded from a template by cuda_configure.bzl -# Update cuda_configure.bzl#verify_build_defines when adding new variables. - -load(":cc_toolchain_config.bzl", "cc_toolchain_config") - -licenses(["restricted"]) - -package(default_visibility = ["//visibility:public"]) - -toolchain( - name = "toolchain-linux-x86_64", - exec_compatible_with = [ - "@bazel_tools//platforms:linux", - "@bazel_tools//platforms:x86_64", - ], - target_compatible_with = [ - "@bazel_tools//platforms:linux", - "@bazel_tools//platforms:x86_64", - ], - toolchain = ":cc-compiler-local", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) - -cc_toolchain_suite( - name = "toolchain", - toolchains = { - "local|compiler": ":cc-compiler-local", - "darwin|compiler": ":cc-compiler-darwin", - "k8": ":cc-compiler-local", - "darwin": ":cc-compiler-darwin", - }, -) - -cc_toolchain( - name = "cc-compiler-local", - all_files = ":crosstool_wrapper_driver_is_not_gcc", - compiler_files = ":empty", - dwp_files = ":empty", - linker_files = ":crosstool_wrapper_driver_is_not_gcc", - objcopy_files = ":empty", - strip_files = ":empty", - # To support linker flags that need to go to the start of command line - # we need the toolchain to support parameter files. Parameter files are - # last on the command line and contain all shared libraries to link, so all - # regular options will be left of them. - supports_param_files = 1, - toolchain_config = ":cc-compiler-local-config", - toolchain_identifier = "local_linux", -) - -cc_toolchain_config( - name = "cc-compiler-local-config", - builtin_include_directories = [ - "/dt7/usr/include/c++/7", - "/dt7/usr/include/c++/7/x86_64-pc-linux-gnu", - "/dt7/usr/include/c++/7/backward", - "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", - "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", - "/dt7/usr/include", - "/usr/local/cuda-11.2/targets/x86_64-linux/include", - "/usr/local/cuda-11.2/include", - "/usr/local/cuda-11.2/extras/CUPTI/include", - "/usr/include", - ], - cpu = "local", - extra_no_canonical_prefixes_flags = ["-fno-canonical-system-headers"], - host_compiler_path = "clang/bin/crosstool_wrapper_driver_is_not_gcc", - host_compiler_prefix = "/usr/bin", - host_compiler_warnings = [], - host_unfiltered_compile_flags = [], - linker_bin_path = "/usr/bin", -) - -cc_toolchain( - name = "cc-compiler-darwin", - all_files = ":crosstool_wrapper_driver_is_not_gcc", - compiler_files = ":empty", - dwp_files = ":empty", - linker_files = ":crosstool_wrapper_driver_is_not_gcc", - objcopy_files = ":empty", - strip_files = ":empty", - supports_param_files = 0, - toolchain_config = ":cc-compiler-local-darwin", - toolchain_identifier = "local_darwin", -) - -cc_toolchain_config( - name = "cc-compiler-local-darwin", - builtin_include_directories = [ - "/dt7/usr/include/c++/7", - "/dt7/usr/include/c++/7/x86_64-pc-linux-gnu", - "/dt7/usr/include/c++/7/backward", - "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", - "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", - "/dt7/usr/include", - "/usr/local/cuda-11.2/targets/x86_64-linux/include", - "/usr/local/cuda-11.2/include", - "/usr/local/cuda-11.2/extras/CUPTI/include", - "/usr/include", - ], - cpu = "darwin", - extra_no_canonical_prefixes_flags = ["-fno-canonical-system-headers"], - host_compiler_path = "clang/bin/crosstool_wrapper_driver_is_not_gcc", - host_compiler_prefix = "/usr/bin", - host_compiler_warnings = [], - host_unfiltered_compile_flags = [], - linker_bin_path = "/usr/bin", -) - -filegroup( - name = "empty", - srcs = [], -) - -filegroup( - name = "crosstool_wrapper_driver_is_not_gcc", - srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], -) diff --git a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl b/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl deleted file mode 100644 index ba002b454..000000000 --- a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl +++ /dev/null @@ -1,1493 +0,0 @@ -"""cc_toolchain_config rule for configuring CUDA toolchains on Linux, Mac, and Windows.""" - -load( - "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", - "action_config", - "env_entry", - "env_set", - "feature", - "feature_set", - "flag_group", - "flag_set", - "tool", - "tool_path", - "variable_with_value", -) -load( - "@bazel_tools//tools/build_defs/cc:action_names.bzl", - "ASSEMBLE_ACTION_NAME", - "CC_FLAGS_MAKE_VARIABLE_ACTION_NAME", - "CLIF_MATCH_ACTION_NAME", - "CPP_COMPILE_ACTION_NAME", - "CPP_HEADER_PARSING_ACTION_NAME", - "CPP_LINK_DYNAMIC_LIBRARY_ACTION_NAME", - "CPP_LINK_EXECUTABLE_ACTION_NAME", - "CPP_LINK_NODEPS_DYNAMIC_LIBRARY_ACTION_NAME", - "CPP_LINK_STATIC_LIBRARY_ACTION_NAME", - "CPP_MODULE_CODEGEN_ACTION_NAME", - "CPP_MODULE_COMPILE_ACTION_NAME", - "C_COMPILE_ACTION_NAME", - "LINKSTAMP_COMPILE_ACTION_NAME", - "LTO_BACKEND_ACTION_NAME", - "LTO_INDEXING_ACTION_NAME", - "OBJCPP_COMPILE_ACTION_NAME", - "OBJCPP_EXECUTABLE_ACTION_NAME", - "OBJC_ARCHIVE_ACTION_NAME", - "OBJC_COMPILE_ACTION_NAME", - "OBJC_EXECUTABLE_ACTION_NAME", - "OBJC_FULLY_LINK_ACTION_NAME", - "PREPROCESS_ASSEMBLE_ACTION_NAME", - "STRIP_ACTION_NAME", -) - -ACTION_NAMES = struct( - assemble = ASSEMBLE_ACTION_NAME, - c_compile = C_COMPILE_ACTION_NAME, - cc_flags_make_variable = CC_FLAGS_MAKE_VARIABLE_ACTION_NAME, - clif_match = CLIF_MATCH_ACTION_NAME, - cpp_compile = CPP_COMPILE_ACTION_NAME, - cpp_header_parsing = CPP_HEADER_PARSING_ACTION_NAME, - cpp_link_dynamic_library = CPP_LINK_DYNAMIC_LIBRARY_ACTION_NAME, - cpp_link_executable = CPP_LINK_EXECUTABLE_ACTION_NAME, - cpp_link_nodeps_dynamic_library = CPP_LINK_NODEPS_DYNAMIC_LIBRARY_ACTION_NAME, - cpp_link_static_library = CPP_LINK_STATIC_LIBRARY_ACTION_NAME, - cpp_module_codegen = CPP_MODULE_CODEGEN_ACTION_NAME, - cpp_module_compile = CPP_MODULE_COMPILE_ACTION_NAME, - ld_embed_data = "ld_embed_data", - linkstamp_compile = LINKSTAMP_COMPILE_ACTION_NAME, - lto_backend = LTO_BACKEND_ACTION_NAME, - lto_indexing = LTO_INDEXING_ACTION_NAME, - objc_archive = OBJC_ARCHIVE_ACTION_NAME, - objc_compile = OBJC_COMPILE_ACTION_NAME, - objc_executable = OBJC_EXECUTABLE_ACTION_NAME, - objc_fully_link = OBJC_FULLY_LINK_ACTION_NAME, - objcopy_embed_data = "objcopy_embed_data", - objcpp_compile = OBJCPP_COMPILE_ACTION_NAME, - objcpp_executable = OBJCPP_EXECUTABLE_ACTION_NAME, - preprocess_assemble = PREPROCESS_ASSEMBLE_ACTION_NAME, - strip = STRIP_ACTION_NAME, -) - -def _impl(ctx): - if (ctx.attr.cpu == "darwin"): - toolchain_identifier = "local_darwin" - elif (ctx.attr.cpu == "local"): - toolchain_identifier = "local_linux" - elif (ctx.attr.cpu == "x64_windows"): - toolchain_identifier = "local_windows" - else: - fail("Unreachable") - - host_system_name = "local" - - target_system_name = "local" - - if (ctx.attr.cpu == "darwin"): - target_cpu = "darwin" - elif (ctx.attr.cpu == "local"): - target_cpu = "local" - elif (ctx.attr.cpu == "x64_windows"): - target_cpu = "x64_windows" - else: - fail("Unreachable") - - if (ctx.attr.cpu == "local"): - target_libc = "local" - elif (ctx.attr.cpu == "darwin"): - target_libc = "macosx" - elif (ctx.attr.cpu == "x64_windows"): - target_libc = "msvcrt" - else: - fail("Unreachable") - - if (ctx.attr.cpu == "darwin" or - ctx.attr.cpu == "local"): - compiler = "compiler" - elif (ctx.attr.cpu == "x64_windows"): - compiler = "msvc-cl" - else: - fail("Unreachable") - - abi_version = "local" - - abi_libc_version = "local" - - cc_target_os = None - - builtin_sysroot = None - - all_link_actions = [ - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ] - - cpp_link_dynamic_library_action = action_config( - action_name = ACTION_NAMES.cpp_link_dynamic_library, - implies = [ - "nologo", - "shared_flag", - "linkstamps", - "output_execpath_flags", - "input_param_flags", - "user_link_flags", - "linker_subsystem_flag", - "linker_param_file", - "msvc_env", - "no_stripping", - "has_configured_linker_path", - "def_file", - ], - tools = [tool(path = ctx.attr.msvc_link_path)], - ) - - cpp_link_nodeps_dynamic_library_action = action_config( - action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library, - implies = [ - "nologo", - "shared_flag", - "linkstamps", - "output_execpath_flags", - "input_param_flags", - "user_link_flags", - "linker_subsystem_flag", - "linker_param_file", - "msvc_env", - "no_stripping", - "has_configured_linker_path", - "def_file", - ], - tools = [tool(path = ctx.attr.msvc_link_path)], - ) - - cpp_link_static_library_action = action_config( - action_name = ACTION_NAMES.cpp_link_static_library, - implies = [ - "nologo", - "archiver_flags", - "input_param_flags", - "linker_param_file", - "msvc_env", - ], - tools = [tool(path = ctx.attr.msvc_lib_path)], - ) - - assemble_action = action_config( - action_name = ACTION_NAMES.assemble, - implies = [ - "compiler_input_flags", - "compiler_output_flags", - "nologo", - "msvc_env", - "sysroot", - ], - tools = [tool(path = ctx.attr.msvc_ml_path)], - ) - - preprocess_assemble_action = action_config( - action_name = ACTION_NAMES.preprocess_assemble, - implies = [ - "compiler_input_flags", - "compiler_output_flags", - "nologo", - "msvc_env", - "sysroot", - ], - tools = [tool(path = ctx.attr.msvc_ml_path)], - ) - - c_compile_action = action_config( - action_name = ACTION_NAMES.c_compile, - implies = [ - "compiler_input_flags", - "compiler_output_flags", - "nologo", - "msvc_env", - "parse_showincludes", - "user_compile_flags", - "sysroot", - "unfiltered_compile_flags", - ], - tools = [tool(path = ctx.attr.msvc_cl_path)], - ) - - cpp_compile_action = action_config( - action_name = ACTION_NAMES.cpp_compile, - implies = [ - "compiler_input_flags", - "compiler_output_flags", - "nologo", - "msvc_env", - "parse_showincludes", - "user_compile_flags", - "sysroot", - "unfiltered_compile_flags", - ], - tools = [tool(path = ctx.attr.msvc_cl_path)], - ) - - cpp_link_executable_action = action_config( - action_name = ACTION_NAMES.cpp_link_executable, - implies = [ - "nologo", - "linkstamps", - "output_execpath_flags", - "input_param_flags", - "user_link_flags", - "linker_subsystem_flag", - "linker_param_file", - "msvc_env", - "no_stripping", - ], - tools = [tool(path = ctx.attr.msvc_link_path)], - ) - - if (ctx.attr.cpu == "darwin" or - ctx.attr.cpu == "local"): - action_configs = [] - elif (ctx.attr.cpu == "x64_windows"): - action_configs = [ - assemble_action, - preprocess_assemble_action, - c_compile_action, - cpp_compile_action, - cpp_link_executable_action, - cpp_link_dynamic_library_action, - cpp_link_nodeps_dynamic_library_action, - cpp_link_static_library_action, - ] - else: - fail("Unreachable") - - no_windows_export_all_symbols_feature = feature(name = "no_windows_export_all_symbols") - - pic_feature = feature( - name = "pic", - enabled = True, - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group(flags = ["-fPIC"], expand_if_available = "pic"), - flag_group( - flags = ["-fPIE"], - expand_if_not_available = "pic", - ), - ], - ), - ], - ) - - preprocessor_defines_feature = feature( - name = "preprocessor_defines", - enabled = True, - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ], - flag_groups = [ - flag_group( - flags = ["/D%{preprocessor_defines}"], - iterate_over = "preprocessor_defines", - ), - ], - ), - ], - ) - - generate_pdb_file_feature = feature( - name = "generate_pdb_file", - requires = [ - feature_set(features = ["dbg"]), - feature_set(features = ["fastbuild"]), - ], - ) - - linkstamps_feature = feature( - name = "linkstamps", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group( - flags = ["%{linkstamp_paths}"], - iterate_over = "linkstamp_paths", - expand_if_available = "linkstamp_paths", - ), - ], - ), - ], - ) - - unfiltered_compile_flags_feature = feature( - name = "unfiltered_compile_flags", - flag_sets = ([ - flag_set( - actions = [ - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ], - flag_groups = [ - flag_group( - flags = ctx.attr.host_unfiltered_compile_flags, - ), - ], - ), - ] if ctx.attr.host_unfiltered_compile_flags else []), - ) - - determinism_feature = feature( - name = "determinism", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = [ - "-Wno-builtin-macro-redefined", - "-D__DATE__=\"redacted\"", - "-D__TIMESTAMP__=\"redacted\"", - "-D__TIME__=\"redacted\"", - ], - ), - ], - ), - ], - ) - - nologo_feature = feature( - name = "nologo", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ACTION_NAMES.cpp_link_static_library, - ], - flag_groups = [flag_group(flags = ["/nologo"])], - ), - ], - ) - - supports_pic_feature = feature(name = "supports_pic", enabled = True) - - output_execpath_flags_feature = feature( - name = "output_execpath_flags", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group( - flags = ["/OUT:%{output_execpath}"], - expand_if_available = "output_execpath", - ), - ], - ), - ], - ) - - default_link_flags_feature = feature( - name = "default_link_flags", - enabled = True, - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/MACHINE:X64"])], - ), - ], - ) - - if (ctx.attr.cpu == "local"): - hardening_feature = feature( - name = "hardening", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = [ - "-U_FORTIFY_SOURCE", - "-D_FORTIFY_SOURCE=1", - "-fstack-protector", - ], - ), - ], - ), - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [flag_group(flags = ["-Wl,-z,relro,-z,now"])], - ), - flag_set( - actions = [ACTION_NAMES.cpp_link_executable], - flag_groups = [flag_group(flags = ["-pie", "-Wl,-z,relro,-z,now"])], - ), - ], - ) - elif (ctx.attr.cpu == "darwin"): - hardening_feature = feature( - name = "hardening", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = [ - "-U_FORTIFY_SOURCE", - "-D_FORTIFY_SOURCE=1", - "-fstack-protector", - ], - ), - ], - ), - flag_set( - actions = [ACTION_NAMES.cpp_link_executable], - flag_groups = [flag_group(flags = ["-pie"])], - ), - ], - ) - else: - hardening_feature = None - - supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True) - - targets_windows_feature = feature( - name = "targets_windows", - enabled = True, - implies = ["copy_dynamic_libraries_to_binary"], - ) - - msvc_env_feature = feature( - name = "msvc_env", - env_sets = [ - env_set( - actions = [ - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ACTION_NAMES.cpp_link_static_library, - ], - env_entries = [ - env_entry(key = "PATH", value = ctx.attr.msvc_env_path), - env_entry( - key = "INCLUDE", - value = ctx.attr.msvc_env_include, - ), - env_entry(key = "LIB", value = ctx.attr.msvc_env_lib), - env_entry(key = "TMP", value = ctx.attr.msvc_env_tmp), - env_entry(key = "TEMP", value = ctx.attr.msvc_env_tmp), - ], - ), - ], - ) - - linker_subsystem_flag_feature = feature( - name = "linker_subsystem_flag", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/SUBSYSTEM:CONSOLE"])], - ), - ], - ) - - dynamic_link_msvcrt_no_debug_feature = feature( - name = "dynamic_link_msvcrt_no_debug", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/MD"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrt.lib"])], - ), - ], - requires = [ - feature_set(features = ["fastbuild"]), - feature_set(features = ["opt"]), - ], - ) - - warnings_feature = feature( - name = "warnings", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = ["-Wall"] + ctx.attr.host_compiler_warnings, - ), - ], - ), - ], - ) - - dynamic_link_msvcrt_debug_feature = feature( - name = "dynamic_link_msvcrt_debug", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/MDd"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrtd.lib"])], - ), - ], - requires = [feature_set(features = ["dbg"])], - ) - - compiler_output_flags_feature = feature( - name = "compiler_output_flags", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.assemble], - flag_groups = [ - flag_group( - flag_groups = [ - flag_group( - flags = ["/Fo%{output_file}", "/Zi"], - expand_if_not_available = "output_preprocess_file", - ), - ], - expand_if_available = "output_file", - expand_if_not_available = "output_assembly_file", - ), - ], - ), - flag_set( - actions = [ - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ], - flag_groups = [ - flag_group( - flag_groups = [ - flag_group( - flags = ["/Fo%{output_file}"], - expand_if_not_available = "output_preprocess_file", - ), - ], - expand_if_available = "output_file", - expand_if_not_available = "output_assembly_file", - ), - flag_group( - flag_groups = [ - flag_group( - flags = ["/Fa%{output_file}"], - expand_if_available = "output_assembly_file", - ), - ], - expand_if_available = "output_file", - ), - flag_group( - flag_groups = [ - flag_group( - flags = ["/P", "/Fi%{output_file}"], - expand_if_available = "output_preprocess_file", - ), - ], - expand_if_available = "output_file", - ), - ], - ), - ], - ) - - default_compile_flags_feature = feature( - name = "default_compile_flags", - enabled = True, - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.linkstamp_compile, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ACTION_NAMES.lto_backend, - ACTION_NAMES.clif_match, - ], - flag_groups = [ - flag_group( - flags = [ - "/DCOMPILER_MSVC", - "/DNOMINMAX", - "/D_WIN32_WINNT=0x0600", - "/D_CRT_SECURE_NO_DEPRECATE", - "/D_CRT_SECURE_NO_WARNINGS", - "/D_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS", - "/bigobj", - "/Zm500", - "/J", - "/Gy", - "/GF", - "/EHsc", - "/wd4351", - "/wd4291", - "/wd4250", - "/wd4996", - ], - ), - ], - ), - ], - ) - - static_link_msvcrt_debug_feature = feature( - name = "static_link_msvcrt_debug", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/MTd"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmtd.lib"])], - ), - ], - requires = [feature_set(features = ["dbg"])], - ) - - static_link_msvcrt_feature = feature(name = "static_link_msvcrt") - - if (ctx.attr.cpu == "darwin" or - ctx.attr.cpu == "local"): - dbg_feature = feature( - name = "dbg", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["-g"])], - ), - ], - implies = ["common"], - ) - elif (ctx.attr.cpu == "x64_windows"): - dbg_feature = feature( - name = "dbg", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/Od", "/Z7", "/DDEBUG"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/DEBUG:FULL", "/INCREMENTAL:NO"])], - ), - ], - implies = ["generate_pdb_file"], - ) - else: - dbg_feature = None - - undefined_dynamic_feature = feature( - name = "undefined-dynamic", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ACTION_NAMES.cpp_link_executable, - ], - flag_groups = [flag_group(flags = ["-undefined", "dynamic_lookup"])], - ), - ], - ) - - parse_showincludes_feature = feature( - name = "parse_showincludes", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_header_parsing, - ], - flag_groups = [flag_group(flags = ["/showIncludes"])], - ), - ], - ) - - linker_param_file_feature = feature( - name = "linker_param_file", - flag_sets = [ - flag_set( - actions = all_link_actions + - [ACTION_NAMES.cpp_link_static_library], - flag_groups = [ - flag_group( - flags = ["@%{linker_param_file}"], - expand_if_available = "linker_param_file", - ), - ], - ), - ], - ) - - static_link_msvcrt_no_debug_feature = feature( - name = "static_link_msvcrt_no_debug", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/MT"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmt.lib"])], - ), - ], - requires = [ - feature_set(features = ["fastbuild"]), - feature_set(features = ["opt"]), - ], - ) - - supports_interface_shared_libraries_feature = feature( - name = "supports_interface_shared_libraries", - enabled = True, - ) - - disable_assertions_feature = feature( - name = "disable-assertions", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["-DNDEBUG"])], - ), - ], - ) - - if (ctx.attr.cpu == "x64_windows"): - fastbuild_feature = feature( - name = "fastbuild", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/Od", "/Z7", "/DDEBUG"])], - ), - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group(flags = ["/DEBUG:FASTLINK", "/INCREMENTAL:NO"]), - ], - ), - ], - implies = ["generate_pdb_file"], - ) - elif (ctx.attr.cpu == "darwin" or - ctx.attr.cpu == "local"): - fastbuild_feature = feature(name = "fastbuild", implies = ["common"]) - else: - fastbuild_feature = None - - user_compile_flags_feature = feature( - name = "user_compile_flags", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ], - flag_groups = [ - flag_group( - flags = ["%{user_compile_flags}"], - iterate_over = "user_compile_flags", - expand_if_available = "user_compile_flags", - ), - ], - ), - ], - ) - - compiler_input_flags_feature = feature( - name = "compiler_input_flags", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ], - flag_groups = [ - flag_group( - flags = ["/c", "%{source_file}"], - expand_if_available = "source_file", - ), - ], - ), - ], - ) - - no_legacy_features_feature = feature(name = "no_legacy_features") - - archiver_flags_feature = feature( - name = "archiver_flags", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.cpp_link_static_library], - flag_groups = [ - flag_group( - flags = ["/OUT:%{output_execpath}"], - expand_if_available = "output_execpath", - ), - ], - ), - ], - ) - - redirector_feature = feature( - name = "redirector", - enabled = True, - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ], - flag_groups = [ - flag_group( - flags = [ - "-B", - "external/local_config_cuda/crosstool/windows/msvc_wrapper_for_nvcc.py", - ], - ), - ], - ), - ], - ) - - linker_bin_path_feature = feature( - name = "linker-bin-path", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["-B" + ctx.attr.linker_bin_path])], - ), - ], - ) - - if (ctx.attr.cpu == "local"): - opt_feature = feature( - name = "opt", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = ["-g0", "-O2", "-ffunction-sections", "-fdata-sections"], - ), - ], - ), - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ACTION_NAMES.cpp_link_executable, - ], - flag_groups = [flag_group(flags = ["-Wl,--gc-sections"])], - ), - ], - implies = ["common", "disable-assertions"], - ) - elif (ctx.attr.cpu == "darwin"): - opt_feature = feature( - name = "opt", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [ - flag_group( - flags = ["-g0", "-O2", "-ffunction-sections", "-fdata-sections"], - ), - ], - ), - ], - implies = ["common", "disable-assertions"], - ) - elif (ctx.attr.cpu == "x64_windows"): - opt_feature = feature( - name = "opt", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["/O2", "/DNDEBUG"])], - ), - ], - ) - else: - opt_feature = None - - include_paths_feature = feature( - name = "include_paths", - enabled = True, - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ], - flag_groups = [ - flag_group( - flags = ["/I%{quote_include_paths}"], - iterate_over = "quote_include_paths", - ), - flag_group( - flags = ["/I%{include_paths}"], - iterate_over = "include_paths", - ), - flag_group( - flags = ["/I%{system_include_paths}"], - iterate_over = "system_include_paths", - ), - ], - ), - ], - ) - - shared_flag_feature = feature( - name = "shared_flag", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [flag_group(flags = ["/DLL"])], - ), - ], - ) - - windows_export_all_symbols_feature = feature(name = "windows_export_all_symbols") - - frame_pointer_feature = feature( - name = "frame-pointer", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["-fno-omit-frame-pointer"])], - ), - ], - ) - - build_id_feature = feature( - name = "build-id", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group( - flags = ["-Wl,--build-id=md5", "-Wl,--hash-style=gnu"], - ), - ], - ), - ], - ) - - sysroot_feature = feature( - name = "sysroot", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.assemble, - ACTION_NAMES.preprocess_assemble, - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_header_parsing, - ACTION_NAMES.cpp_module_compile, - ACTION_NAMES.cpp_module_codegen, - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [ - flag_group( - flags = ["--sysroot=%{sysroot}"], - iterate_over = "sysroot", - expand_if_available = "sysroot", - ), - ], - ), - ], - ) - - def_file_feature = feature( - name = "def_file", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group( - flags = ["/DEF:%{def_file_path}", "/ignore:4070"], - expand_if_available = "def_file_path", - ), - ], - ), - ], - ) - - if (ctx.attr.cpu == "darwin"): - stdlib_feature = feature( - name = "stdlib", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["-lc++"])], - ), - ], - ) - elif (ctx.attr.cpu == "local"): - stdlib_feature = feature( - name = "stdlib", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [flag_group(flags = ["-lstdc++"])], - ), - ], - ) - else: - stdlib_feature = None - - no_stripping_feature = feature(name = "no_stripping") - - alwayslink_feature = feature( - name = "alwayslink", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ACTION_NAMES.cpp_link_executable, - ], - flag_groups = [flag_group(flags = ["-Wl,-no-as-needed"])], - ), - ], - ) - - input_param_flags_feature = feature( - name = "input_param_flags", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [ - flag_group( - flags = ["/IMPLIB:%{interface_library_output_path}"], - expand_if_available = "interface_library_output_path", - ), - ], - ), - flag_set( - actions = all_link_actions + - [ACTION_NAMES.cpp_link_static_library], - flag_groups = [ - flag_group( - iterate_over = "libraries_to_link", - flag_groups = [ - flag_group( - iterate_over = "libraries_to_link.object_files", - flag_groups = [flag_group(flags = ["%{libraries_to_link.object_files}"])], - expand_if_equal = variable_with_value( - name = "libraries_to_link.type", - value = "object_file_group", - ), - ), - flag_group( - flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], - expand_if_equal = variable_with_value( - name = "libraries_to_link.type", - value = "object_file", - ), - ), - flag_group( - flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], - expand_if_equal = variable_with_value( - name = "libraries_to_link.type", - value = "interface_library", - ), - ), - flag_group( - flag_groups = [ - flag_group( - flags = ["%{libraries_to_link.name}"], - expand_if_false = "libraries_to_link.is_whole_archive", - ), - flag_group( - flags = ["/WHOLEARCHIVE:%{libraries_to_link.name}"], - expand_if_true = "libraries_to_link.is_whole_archive", - ), - ], - expand_if_equal = variable_with_value( - name = "libraries_to_link.type", - value = "static_library", - ), - ), - ], - expand_if_available = "libraries_to_link", - ), - ], - ), - ], - ) - - if (ctx.attr.cpu == "local"): - no_canonical_prefixes_feature = feature( - name = "no-canonical-prefixes", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [ - flag_group( - flags = [ - "-no-canonical-prefixes", - ] + ctx.attr.extra_no_canonical_prefixes_flags, - ), - ], - ), - ], - ) - elif (ctx.attr.cpu == "darwin"): - no_canonical_prefixes_feature = feature( - name = "no-canonical-prefixes", - flag_sets = [ - flag_set( - actions = [ - ACTION_NAMES.c_compile, - ACTION_NAMES.cpp_compile, - ACTION_NAMES.cpp_link_executable, - ACTION_NAMES.cpp_link_dynamic_library, - ACTION_NAMES.cpp_link_nodeps_dynamic_library, - ], - flag_groups = [flag_group(flags = ["-no-canonical-prefixes"])], - ), - ], - ) - else: - no_canonical_prefixes_feature = None - - has_configured_linker_path_feature = feature(name = "has_configured_linker_path") - - copy_dynamic_libraries_to_binary_feature = feature(name = "copy_dynamic_libraries_to_binary") - - user_link_flags_feature = feature( - name = "user_link_flags", - flag_sets = [ - flag_set( - actions = all_link_actions, - flag_groups = [ - flag_group( - flags = ["%{user_link_flags}"], - iterate_over = "user_link_flags", - expand_if_available = "user_link_flags", - ), - ], - ), - ], - ) - - cpp11_feature = feature( - name = "c++11", - flag_sets = [ - flag_set( - actions = [ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["-std=c++11"])], - ), - ], - ) - - if (ctx.attr.cpu == "local"): - common_feature = feature( - name = "common", - implies = [ - "stdlib", - "c++11", - "determinism", - "alwayslink", - "hardening", - "warnings", - "frame-pointer", - "build-id", - "no-canonical-prefixes", - "linker-bin-path", - ], - ) - elif (ctx.attr.cpu == "darwin"): - common_feature = feature( - name = "common", - implies = [ - "stdlib", - "c++11", - "determinism", - "hardening", - "warnings", - "frame-pointer", - "no-canonical-prefixes", - "linker-bin-path", - "undefined-dynamic", - ], - ) - else: - common_feature = None - - if (ctx.attr.cpu == "local"): - features = [ - cpp11_feature, - stdlib_feature, - determinism_feature, - alwayslink_feature, - pic_feature, - hardening_feature, - warnings_feature, - frame_pointer_feature, - build_id_feature, - no_canonical_prefixes_feature, - disable_assertions_feature, - linker_bin_path_feature, - common_feature, - opt_feature, - fastbuild_feature, - dbg_feature, - supports_dynamic_linker_feature, - supports_pic_feature, - ] - elif (ctx.attr.cpu == "darwin"): - features = [ - cpp11_feature, - stdlib_feature, - determinism_feature, - pic_feature, - hardening_feature, - warnings_feature, - frame_pointer_feature, - no_canonical_prefixes_feature, - disable_assertions_feature, - linker_bin_path_feature, - undefined_dynamic_feature, - common_feature, - opt_feature, - fastbuild_feature, - dbg_feature, - supports_dynamic_linker_feature, - supports_pic_feature, - ] - elif (ctx.attr.cpu == "x64_windows"): - features = [ - no_legacy_features_feature, - redirector_feature, - nologo_feature, - has_configured_linker_path_feature, - no_stripping_feature, - targets_windows_feature, - copy_dynamic_libraries_to_binary_feature, - default_compile_flags_feature, - msvc_env_feature, - include_paths_feature, - preprocessor_defines_feature, - parse_showincludes_feature, - generate_pdb_file_feature, - shared_flag_feature, - linkstamps_feature, - output_execpath_flags_feature, - archiver_flags_feature, - input_param_flags_feature, - linker_subsystem_flag_feature, - user_link_flags_feature, - default_link_flags_feature, - linker_param_file_feature, - static_link_msvcrt_feature, - static_link_msvcrt_no_debug_feature, - dynamic_link_msvcrt_no_debug_feature, - static_link_msvcrt_debug_feature, - dynamic_link_msvcrt_debug_feature, - dbg_feature, - fastbuild_feature, - opt_feature, - user_compile_flags_feature, - sysroot_feature, - unfiltered_compile_flags_feature, - compiler_output_flags_feature, - compiler_input_flags_feature, - def_file_feature, - windows_export_all_symbols_feature, - no_windows_export_all_symbols_feature, - supports_dynamic_linker_feature, - supports_interface_shared_libraries_feature, - ] - else: - fail("Unreachable") - - cxx_builtin_include_directories = ctx.attr.builtin_include_directories - - if (ctx.attr.cpu == "x64_windows"): - tool_paths = [ - tool_path(name = "ar", path = ctx.attr.msvc_lib_path), - tool_path(name = "ml", path = ctx.attr.msvc_ml_path), - tool_path(name = "cpp", path = ctx.attr.msvc_cl_path), - tool_path(name = "gcc", path = ctx.attr.msvc_cl_path), - tool_path(name = "gcov", path = "wrapper/bin/msvc_nop.bat"), - tool_path(name = "ld", path = ctx.attr.msvc_link_path), - tool_path(name = "nm", path = "wrapper/bin/msvc_nop.bat"), - tool_path( - name = "objcopy", - path = "wrapper/bin/msvc_nop.bat", - ), - tool_path( - name = "objdump", - path = "wrapper/bin/msvc_nop.bat", - ), - tool_path( - name = "strip", - path = "wrapper/bin/msvc_nop.bat", - ), - ] - elif (ctx.attr.cpu == "local"): - tool_paths = [ - tool_path(name = "gcc", path = ctx.attr.host_compiler_path), - tool_path(name = "ar", path = ctx.attr.host_compiler_prefix + "/ar"), - tool_path(name = "compat-ld", path = ctx.attr.host_compiler_prefix + "/ld"), - tool_path(name = "cpp", path = ctx.attr.host_compiler_prefix + "/cpp"), - tool_path(name = "dwp", path = ctx.attr.host_compiler_prefix + "/dwp"), - tool_path(name = "gcov", path = ctx.attr.host_compiler_prefix + "/gcov"), - tool_path(name = "ld", path = ctx.attr.host_compiler_prefix + "/ld"), - tool_path(name = "nm", path = ctx.attr.host_compiler_prefix + "/nm"), - tool_path(name = "objcopy", path = ctx.attr.host_compiler_prefix + "/objcopy"), - tool_path(name = "objdump", path = ctx.attr.host_compiler_prefix + "/objdump"), - tool_path(name = "strip", path = ctx.attr.host_compiler_prefix + "/strip"), - ] - elif (ctx.attr.cpu == "darwin"): - tool_paths = [ - tool_path(name = "gcc", path = ctx.attr.host_compiler_path), - tool_path(name = "ar", path = ctx.attr.host_compiler_prefix + "/libtool"), - tool_path(name = "compat-ld", path = ctx.attr.host_compiler_prefix + "/ld"), - tool_path(name = "cpp", path = ctx.attr.host_compiler_prefix + "/cpp"), - tool_path(name = "dwp", path = ctx.attr.host_compiler_prefix + "/dwp"), - tool_path(name = "gcov", path = ctx.attr.host_compiler_prefix + "/gcov"), - tool_path(name = "ld", path = ctx.attr.host_compiler_prefix + "/ld"), - tool_path(name = "nm", path = ctx.attr.host_compiler_prefix + "/nm"), - tool_path(name = "objcopy", path = ctx.attr.host_compiler_prefix + "/objcopy"), - tool_path(name = "objdump", path = ctx.attr.host_compiler_prefix + "/objdump"), - tool_path(name = "strip", path = ctx.attr.host_compiler_prefix + "/strip"), - ] - else: - fail("Unreachable") - - out = ctx.actions.declare_file(ctx.label.name) - ctx.actions.write(out, "Fake executable") - return [ - cc_common.create_cc_toolchain_config_info( - ctx = ctx, - features = features, - action_configs = action_configs, - artifact_name_patterns = [], - cxx_builtin_include_directories = cxx_builtin_include_directories, - toolchain_identifier = toolchain_identifier, - host_system_name = host_system_name, - target_system_name = target_system_name, - target_cpu = target_cpu, - target_libc = target_libc, - compiler = compiler, - abi_version = abi_version, - abi_libc_version = abi_libc_version, - tool_paths = tool_paths, - make_variables = [], - builtin_sysroot = builtin_sysroot, - cc_target_os = cc_target_os, - ), - DefaultInfo( - executable = out, - ), - ] - -cc_toolchain_config = rule( - attrs = { - "cpu": attr.string( - mandatory = True, - values = [ - "darwin", - "local", - "x64_windows", - ], - ), - "builtin_include_directories": attr.string_list(), - "extra_no_canonical_prefixes_flags": attr.string_list(), - "host_compiler_path": attr.string(), - "host_compiler_prefix": attr.string(), - "host_compiler_warnings": attr.string_list(), - "host_unfiltered_compile_flags": attr.string_list(), - "linker_bin_path": attr.string(), - "msvc_cl_path": attr.string(default = "msvc_not_used"), - "msvc_env_include": attr.string(default = "msvc_not_used"), - "msvc_env_lib": attr.string(default = "msvc_not_used"), - "msvc_env_path": attr.string(default = "msvc_not_used"), - "msvc_env_tmp": attr.string(default = "msvc_not_used"), - "msvc_lib_path": attr.string(default = "msvc_not_used"), - "msvc_link_path": attr.string(default = "msvc_not_used"), - "msvc_ml_path": attr.string(default = "msvc_not_used"), - }, - executable = True, - provides = [CcToolchainConfigInfo], - implementation = _impl, -) diff --git a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc deleted file mode 100755 index 01b454807..000000000 --- a/third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc +++ /dev/null @@ -1,281 +0,0 @@ -#!/usr/bin/env python -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Crosstool wrapper for compiling CUDA programs. - -SYNOPSIS: - crosstool_wrapper_is_not_gcc [options passed in by cc_library() - or cc_binary() rule] - -DESCRIPTION: - This script is expected to be called by the cc_library() or cc_binary() bazel - rules. When the option "-x cuda" is present in the list of arguments passed - to this script, it invokes the nvcc CUDA compiler. Most arguments are passed - as is as a string to --compiler-options of nvcc. When "-x cuda" is not - present, this wrapper invokes hybrid_driver_is_not_gcc with the input - arguments as is. -""" - -from __future__ import print_function - -from argparse import ArgumentParser -import os -import subprocess -import re -import sys -import pipes - -# Template values set by cuda_autoconf. -CPU_COMPILER = ('/dt7/usr/bin/gcc') -GCC_HOST_COMPILER_PATH = ('/dt7/usr/bin/gcc') - -NVCC_PATH = '/usr/local/cuda-11.2/bin/nvcc' -PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) -NVCC_VERSION = '11.2' - -def Log(s): - print('gpus/crosstool: {0}'.format(s)) - - -def GetOptionValue(argv, option): - """Extract the list of values for option from the argv list. - - Args: - argv: A list of strings, possibly the argv passed to main(). - option: The option whose value to extract, with the leading '-'. - - Returns: - A list of values, either directly following the option, - (eg., -opt val1 val2) or values collected from multiple occurrences of - the option (eg., -opt val1 -opt val2). - """ - - parser = ArgumentParser() - parser.add_argument(option, nargs='*', action='append') - option = option.lstrip('-').replace('-', '_') - args, _ = parser.parse_known_args(argv) - if not args or not vars(args)[option]: - return [] - else: - return sum(vars(args)[option], []) - - -def GetHostCompilerOptions(argv): - """Collect the -isystem, -iquote, and --sysroot option values from argv. - - Args: - argv: A list of strings, possibly the argv passed to main(). - - Returns: - The string that can be used as the --compiler-options to nvcc. - """ - - parser = ArgumentParser() - parser.add_argument('-isystem', nargs='*', action='append') - parser.add_argument('-iquote', nargs='*', action='append') - parser.add_argument('--sysroot', nargs=1) - parser.add_argument('-g', nargs='*', action='append') - parser.add_argument('-fno-canonical-system-headers', action='store_true') - parser.add_argument('-no-canonical-prefixes', action='store_true') - - args, _ = parser.parse_known_args(argv) - - opts = '' - - if args.isystem: - opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) - if args.iquote: - opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) - if args.g: - opts += ' -g' + ' -g'.join(sum(args.g, [])) - if args.fno_canonical_system_headers: - opts += ' -fno-canonical-system-headers' - if args.no_canonical_prefixes: - opts += ' -no-canonical-prefixes' - if args.sysroot: - opts += ' --sysroot ' + args.sysroot[0] - - return opts - -def _update_options(nvcc_options): - if NVCC_VERSION in ("7.0",): - return nvcc_options - - update_options = { "relaxed-constexpr" : "expt-relaxed-constexpr" } - return [ update_options[opt] if opt in update_options else opt - for opt in nvcc_options ] - -def GetNvccOptions(argv): - """Collect the -nvcc_options values from argv. - - Args: - argv: A list of strings, possibly the argv passed to main(). - - Returns: - The string that can be passed directly to nvcc. - """ - - parser = ArgumentParser() - parser.add_argument('-nvcc_options', nargs='*', action='append') - - args, _ = parser.parse_known_args(argv) - - if args.nvcc_options: - options = _update_options(sum(args.nvcc_options, [])) - return ' '.join(['--'+a for a in options]) - return '' - -def system(cmd): - """Invokes cmd with os.system(). - - Args: - cmd: The command. - - Returns: - The exit code if the process exited with exit() or -signal - if the process was terminated by a signal. - """ - retv = os.system(cmd) - if os.WIFEXITED(retv): - return os.WEXITSTATUS(retv) - else: - return -os.WTERMSIG(retv) - -def InvokeNvcc(argv, log=False): - """Call nvcc with arguments assembled from argv. - - Args: - argv: A list of strings, possibly the argv passed to main(). - log: True if logging is requested. - - Returns: - The return value of calling system('nvcc ' + args) - """ - - host_compiler_options = GetHostCompilerOptions(argv) - nvcc_compiler_options = GetNvccOptions(argv) - opt_option = GetOptionValue(argv, '-O') - m_options = GetOptionValue(argv, '-m') - m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) - include_options = GetOptionValue(argv, '-I') - out_file = GetOptionValue(argv, '-o') - depfiles = GetOptionValue(argv, '-MF') - defines = GetOptionValue(argv, '-D') - defines = ''.join([' -D' + define for define in defines]) - undefines = GetOptionValue(argv, '-U') - undefines = ''.join([' -U' + define for define in undefines]) - std_options = GetOptionValue(argv, '-std') - # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang. - nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] - std_options = ''.join([' -std=' + define - for define in std_options if define in nvcc_allowed_std_options][-1:]) - fatbin_options = ''.join([' --fatbin-options=' + option - for option in GetOptionValue(argv, '-Xcuda-fatbinary')]) - - # The list of source files get passed after the -c option. I don't know of - # any other reliable way to just get the list of source files to be compiled. - src_files = GetOptionValue(argv, '-c') - - # Pass -w through from host to nvcc, but don't do anything fancier with - # warnings-related flags, since they're not necessarily the same across - # compilers. - warning_options = ' -w' if '-w' in argv else '' - - if len(src_files) == 0: - return 1 - if len(out_file) != 1: - return 1 - - opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0) - else ' -g') - - includes = (' -I ' + ' -I '.join(include_options) - if len(include_options) > 0 - else '') - - # Unfortunately, there are other options that have -c prefix too. - # So allowing only those look like C/C++ files. - src_files = [f for f in src_files if - re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)] - srcs = ' '.join(src_files) - out = ' -o ' + out_file[0] - - nvccopts = '-D_FORCE_INLINES ' - for capability in GetOptionValue(argv, "--cuda-gpu-arch"): - capability = capability[len('sm_'):] - nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (capability, - capability) - for capability in GetOptionValue(argv, '--cuda-include-ptx'): - capability = capability[len('sm_'):] - nvccopts += r'-gencode=arch=compute_%s,\"code=compute_%s\" ' % (capability, - capability) - nvccopts += nvcc_compiler_options - nvccopts += undefines - nvccopts += defines - nvccopts += std_options - nvccopts += m_options - nvccopts += warning_options - nvccopts += fatbin_options - - if depfiles: - # Generate the dependency file - depfile = depfiles[0] - cmd = (NVCC_PATH + ' ' + nvccopts + - ' --compiler-options "' + host_compiler_options + '"' + - ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH + - ' -I .' + - ' -x cu ' + opt + includes + ' ' + srcs + ' -M -o ' + depfile) - if log: Log(cmd) - exit_status = system(cmd) - if exit_status != 0: - return exit_status - - cmd = (NVCC_PATH + ' ' + nvccopts + - ' --compiler-options "' + host_compiler_options + ' -fPIC"' + - ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH + - ' -I .' + - ' -x cu ' + opt + includes + ' -c ' + srcs + out) - - # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. - # Need to investigate and fix. - cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd - if log: Log(cmd) - return system(cmd) - - -def main(): - parser = ArgumentParser() - parser.add_argument('-x', nargs=1) - parser.add_argument('--cuda_log', action='store_true') - args, leftover = parser.parse_known_args(sys.argv[1:]) - - if args.x and args.x[0] == 'cuda': - if args.cuda_log: Log('-x cuda') - leftover = [pipes.quote(s) for s in leftover] - if args.cuda_log: Log('using nvcc') - return InvokeNvcc(leftover, log=args.cuda_log) - - # Strip our flags before passing through to the CPU compiler for files which - # are not -x cuda. We can't just pass 'leftover' because it also strips -x. - # We not only want to pass -x to the CPU compiler, but also keep it in its - # relative location in the argv list (the compiler is actually sensitive to - # this). - cpu_compiler_flags = [flag for flag in sys.argv[1:] - if not flag.startswith(('--cuda_log'))] - - return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) - -if __name__ == '__main__': - sys.exit(main())