From 6180964a01f67ed322da4ba2b2a3e6cc6c2cc760 Mon Sep 17 00:00:00 2001
From: Mats Petersson <mats.petersson@arm.com>
Date: Thu, 5 Oct 2023 11:06:00 +0100
Subject: [PATCH] [flang]Pass to add vscale range attribute (#68103)

Add vscale range attirbute for the Scalable Vector Extension (SVE) if
provided on the command-line (options in a previous commit)

If no command-line option is provided, if the target-feature of SVE is
specified and the architecture is AArch64, it defualts to 128-2048. in
other words a vscale-min of 1, vscale-max of 16.

A pass is used to add the atribute to all functions. The vectorizer will
use this attribute to generate the SVE instruction to match the range
specified. The attribute is harmless if there is no vectorizable
operations in the function.
---
 .../flang/Optimizer/Transforms/Passes.h       |  4 +
 .../flang/Optimizer/Transforms/Passes.td      | 14 +++
 flang/include/flang/Tools/CLOptions.inc       |  4 +
 flang/include/flang/Tools/CrossToolHelpers.h  |  2 +
 flang/lib/Frontend/FrontendActions.cpp        | 28 ++++++
 flang/lib/Optimizer/Transforms/CMakeLists.txt |  1 +
 flang/lib/Optimizer/Transforms/VScaleAttr.cpp | 90 +++++++++++++++++++
 .../Arm/arm-sve-vector-bits-vscale-range.f90  | 23 +++++
 8 files changed, 166 insertions(+)
 create mode 100644 flang/lib/Optimizer/Transforms/VScaleAttr.cpp
 create mode 100644 flang/test/Lower/Arm/arm-sve-vector-bits-vscale-range.f90

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 8aeb3e373298e8..64882c8ec406b0 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -80,6 +80,10 @@ std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
 createOMPMarkDeclareTargetPass();
 
+std::unique_ptr<mlir::Pass> createVScaleAttrPass();
+std::unique_ptr<mlir::Pass>
+createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
+
 // declarative passes
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/Transforms/Passes.h.inc"
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 9474edf13ce463..80da485392007f 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -326,4 +326,18 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
+  let summary = "Add vscale_range attribute to functions";
+  let description = [{
+     Set an attribute for the vscale range on functions, to allow scalable
+     vector operations to be used on processors with variable vector length.
+  }];
+  let options = [
+    Option<"vscaleRange", "vscale-range",
+           "std::pair<unsigned, unsigned>", /*default=*/"std::pair<unsigned, unsigned>{}",
+           "vector scale range">,
+  ];
+  let constructor = "::fir::createVScaleAttrPass()";
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 0b210bec9f785c..76d18b73aee20a 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -299,6 +299,10 @@ inline void createDefaultFIRCodeGenPassPipeline(
   fir::addTargetRewritePass(pm);
   fir::addExternalNameConversionPass(pm, config.Underscoring);
   fir::createDebugPasses(pm, config.DebugInfo);
+
+  if (config.VScaleMin != 0)
+    pm.addPass(fir::createVScaleAttrPass({config.VScaleMin, config.VScaleMax}));
+
   fir::addFIRToLLVMPass(pm, config.OptLevel);
 }
 
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 8453828995ad01..6245a2f1376fcc 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -42,6 +42,8 @@ struct MLIRToLLVMPassPipelineConfig {
   bool LoopVersioning = false; ///< Run the version loop pass.
   llvm::codegenoptions::DebugInfoKind DebugInfo =
       llvm::codegenoptions::NoDebugInfo; ///< Debug info generation.
+  unsigned VScaleMin = 0; ///< SVE vector range minimum.
+  unsigned VScaleMax = 0; ///< SVE vector range maximum.
 };
 
 struct OffloadModuleOpts {
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index fa12e37607cf11..73c00c8679c7ec 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -695,6 +695,22 @@ void CodeGenAction::lowerHLFIRToFIR() {
   }
 }
 
+// TODO: We should get this from TargetInfo. However, that depends on
+// too much of clang, so for now, replicate the functionality.
+static std::optional<std::pair<unsigned, unsigned>>
+getVScaleRange(CompilerInstance &ci,
+               const Fortran::frontend::LangOptions &langOpts) {
+  if (langOpts.VScaleMin || langOpts.VScaleMax)
+    return std::pair<unsigned, unsigned>(
+        langOpts.VScaleMin ? langOpts.VScaleMin : 1, langOpts.VScaleMax);
+
+  std::string featuresStr = getTargetFeatures(ci);
+  if (featuresStr.find("+sve") != std::string::npos)
+    return std::pair<unsigned, unsigned>(1, 16);
+
+  return std::nullopt;
+}
+
 // Lower the previously generated MLIR module into an LLVM IR module
 void CodeGenAction::generateLLVMIR() {
   assert(mlirModule && "The MLIR module has not been generated yet.");
@@ -715,6 +731,18 @@ void CodeGenAction::generateLLVMIR() {
 
   MLIRToLLVMPassPipelineConfig config(level, opts);
 
+  const auto targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+
+  // Only get the vscale range if AArch64.
+  if (triple.isAArch64()) {
+    auto langOpts = ci.getInvocation().getLangOpts();
+    if (auto vsr = getVScaleRange(ci, langOpts)) {
+      config.VScaleMin = vsr->first;
+      config.VScaleMax = vsr->second;
+    }
+  }
+
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, config);
   (void)mlir::applyPassManagerCLOptions(pm);
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 3d2b7e5eaeade0..428c4c2a1e6440 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
   OMPEarlyOutlining.cpp
   OMPFunctionFiltering.cpp
   OMPMarkDeclareTarget.cpp
+  VScaleAttr.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/Transforms/VScaleAttr.cpp b/flang/lib/Optimizer/Transforms/VScaleAttr.cpp
new file mode 100644
index 00000000000000..601a937de37be9
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/VScaleAttr.cpp
@@ -0,0 +1,90 @@
+//===- VScaleAttr.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass adds a `vscale_range` attribute to function definitions.
+/// The attribute is used for scalable vector operations on Arm processors
+/// and should only be run on processors that support this feature. [It is
+/// likely harmless to run it on something else, but it is also not valuable].
+//===----------------------------------------------------------------------===//
+
+#include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/Inquiry.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+
+namespace fir {
+#define GEN_PASS_DECL_VSCALEATTR
+#define GEN_PASS_DEF_VSCALEATTR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "vscale-attr"
+
+namespace {
+
+class VScaleAttrPass : public fir::impl::VScaleAttrBase<VScaleAttrPass> {
+public:
+  VScaleAttrPass(const fir::VScaleAttrOptions &options) {
+    vscaleRange = options.vscaleRange;
+  }
+  VScaleAttrPass() {}
+  void runOnOperation() override;
+};
+
+} // namespace
+
+void VScaleAttrPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+  mlir::func::FuncOp func = getOperation();
+
+  LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
+
+  auto context = &getContext();
+
+  auto intTy = mlir::IntegerType::get(context, 32);
+
+  assert(vscaleRange.first && "VScaleRange minimum should be non-zero");
+
+  func->setAttr("vscale_range",
+                mlir::LLVM::VScaleRangeAttr::get(
+                    context, mlir::IntegerAttr::get(intTy, vscaleRange.first),
+                    mlir::IntegerAttr::get(intTy, vscaleRange.second)));
+
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass>
+fir::createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr) {
+  VScaleAttrOptions opts;
+  opts.vscaleRange = vscaleAttr;
+  return std::make_unique<VScaleAttrPass>(opts);
+}
+
+std::unique_ptr<mlir::Pass> fir::createVScaleAttrPass() {
+  return std::make_unique<VScaleAttrPass>();
+}
diff --git a/flang/test/Lower/Arm/arm-sve-vector-bits-vscale-range.f90 b/flang/test/Lower/Arm/arm-sve-vector-bits-vscale-range.f90
new file mode 100644
index 00000000000000..1c413477826e5b
--- /dev/null
+++ b/flang/test/Lower/Arm/arm-sve-vector-bits-vscale-range.f90
@@ -0,0 +1,23 @@
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=1 -mvscale-max=1  -emit-llvm -o - %s | FileCheck %s -D#VBITS=1
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=2 -mvscale-max=2  -emit-llvm -o - %s | FileCheck %s -D#VBITS=2
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4  -emit-llvm -o - %s | FileCheck %s -D#VBITS=4
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=8 -mvscale-max=8  -emit-llvm -o - %s | FileCheck %s -D#VBITS=8
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=16 -mvscale-max=16  -emit-llvm -o - %s | FileCheck %s -D#VBITS=16
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -mvscale-min=1 -mvscale-max=1  -emit-llvm -o - %s | FileCheck %s -D#VBITS=1
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -mvscale-min=2 -mvscale-max=2  -emit-llvm -o - %s | FileCheck %s -D#VBITS=2
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=1  -emit-llvm -o - %s | FileCheck %s -D#VBITS=1 --check-prefix=CHECK-NOMAX
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=2  -emit-llvm -o - %s | FileCheck %s -D#VBITS=2 --check-prefix=CHECK-NOMAX
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4  -emit-llvm -o - %s | FileCheck %s -D#VBITS=4 --check-prefix=CHECK-NOMAX
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=8  -emit-llvm -o - %s | FileCheck %s -D#VBITS=8 --check-prefix=CHECK-NOMAX
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=16  -emit-llvm -o - %s | FileCheck %s -D#VBITS=16 --check-prefix=CHECK-NOMAX
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -mvscale-min=1 -mvscale-max=0  -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-UNBOUNDED
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=1 -mvscale-max=0  -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-UNBOUNDED
+! RUN: %flang_fc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
+
+! CHECK-LABEL: @func_() #0
+! CHECK: attributes #0 = {{{.*}} vscale_range([[#VBITS]],[[#VBITS]]) {{.*}}}
+! CHECK-NOMAX: attributes #0 = {{{.*}} vscale_range([[#VBITS]],0) {{.*}}}
+! CHECK-UNBOUNDED: attributes #0 = {{{.*}} vscale_range(1,0) {{.*}}}
+! CHECK-NONE: attributes #0 = {{{.*}} vscale_range(1,16) {{.*}}}
+subroutine func
+end subroutine func