From b5b9092c5c982b8a9495d17a2301c15c9ae11f7e Mon Sep 17 00:00:00 2001
From: Jamie Cunliffe <Jamie.Cunliffe@arm.com>
Date: Wed, 5 May 2021 14:39:09 +0100
Subject: [PATCH 1/4] Intrinsic test tool to compare neon intrinsics with C

This tool generates rust and C programs that will call intrinsics and
print the results with multiple different inputs. It will build all
the programs and then run each of them and diff the output printing
any differences that are found.

It uses the tracking spreadsheet (with the % column renamed to
enabled) to determine which intrinsics to test.

It will filter out any intrinsics that have an argument with the name
"n" or "lane" as those have constraints on them as to what numbers can
be used.
---
 .gitignore                             |   2 +
 Cargo.toml                             |   1 +
 crates/intrinsic-test/Cargo.toml       |  16 +
 crates/intrinsic-test/README.md        |  23 ++
 crates/intrinsic-test/src/argument.rs  | 137 +++++++
 crates/intrinsic-test/src/intrinsic.rs | 112 ++++++
 crates/intrinsic-test/src/main.rs      | 380 +++++++++++++++++++
 crates/intrinsic-test/src/types.rs     | 483 +++++++++++++++++++++++++
 crates/intrinsic-test/src/values.rs    | 126 +++++++
 9 files changed, 1280 insertions(+)
 create mode 100644 crates/intrinsic-test/Cargo.toml
 create mode 100644 crates/intrinsic-test/README.md
 create mode 100644 crates/intrinsic-test/src/argument.rs
 create mode 100644 crates/intrinsic-test/src/intrinsic.rs
 create mode 100644 crates/intrinsic-test/src/main.rs
 create mode 100644 crates/intrinsic-test/src/types.rs
 create mode 100644 crates/intrinsic-test/src/values.rs
diff --git a/.gitignore b/.gitignore
index 97647e1e70..3a1fbc6423 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@ target
 tags
 crates/stdarch-gen/aarch64.rs
 crates/stdarch-gen/arm.rs
+c_programs/*
+rust_programs/*
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 73f69ca46f..6efd6b189a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ members = [
   "crates/core_arch",
   "crates/std_detect",
   "crates/stdarch-gen",
+  "crates/intrinsic-test",
   "examples/"
 ]
 exclude = [
diff --git a/crates/intrinsic-test/Cargo.toml b/crates/intrinsic-test/Cargo.toml
new file mode 100644
index 0000000000..4cdf811e5c
--- /dev/null
+++ b/crates/intrinsic-test/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "intrinsic-test"
+version = "0.1.0"
+authors = ["Jamie Cunliffe <Jamie.Cunliffe@arm.com>"]
+edition = "2018"
+
+[dependencies]
+lazy_static = "1.4.0"
+serde = { version = "1", features = ["derive"] }
+csv = "1.1"
+clap = "2.33.3"
+regex = "1.4.2"
+log = "0.4.11"
+pretty_env_logger = "0.4.0"
+rayon = "1.5.0"
+diff = "0.1.12"
\ No newline at end of file
diff --git a/crates/intrinsic-test/README.md b/crates/intrinsic-test/README.md
new file mode 100644
index 0000000000..0f60aaa040
--- /dev/null
+++ b/crates/intrinsic-test/README.md
@@ -0,0 +1,23 @@
+Generate and run programs using equivalent C and Rust intrinsics, checking that
+each produces the same result from random inputs.
+
+# Usage
+```
+USAGE:
+    intrinsic-test [OPTIONS] <INPUT>
+
+FLAGS:
+    -h, --help       Prints help information
+    -V, --version    Prints version information
+
+OPTIONS:
+        --cppcompiler <CPPCOMPILER>    The C++ compiler to use for compiling the c++ code [default: clang++]
+        --toolchain <TOOLCHAIN>        The rust toolchain to use for building the rust code [default: nightly]
+
+ARGS:
+    <INPUT>    The input file containing the intrinsics
+```
+
+The intrinsic.csv is the arm neon tracking google sheet (https://docs.google.com/spreadsheets/d/1MqW1g8c7tlhdRWQixgdWvR4uJHNZzCYAf4V0oHjZkwA/edit#gid=0)
+that contains the intrinsic list. The done percentage column should be renamed to "enabled".
+
diff --git a/crates/intrinsic-test/src/argument.rs b/crates/intrinsic-test/src/argument.rs
new file mode 100644
index 0000000000..96ed440ee7
--- /dev/null
+++ b/crates/intrinsic-test/src/argument.rs
@@ -0,0 +1,137 @@
+use serde::{Deserialize, Deserializer};
+
+use crate::types::IntrinsicType;
+use crate::Language;
+
+/// An argument for the intrinsic.
+#[derive(Debug, PartialEq, Clone)]
+pub struct Argument {
+    /// The argument's index in the intrinsic function call.
+    pub pos: usize,
+    /// The argument name.
+    pub name: String,
+    /// The type of the argument.
+    pub ty: IntrinsicType,
+}
+
+impl Argument {
+    /// Creates an argument from a Rust style signature i.e. `name: type`
+    fn from_rust(pos: usize, arg: &str) -> Result<Self, String> {
+        let mut parts = arg.split(':');
+        let name = parts.next().unwrap().trim().to_string();
+        let ty = IntrinsicType::from_rust(parts.next().unwrap().trim())?;
+
+        Ok(Self { pos, name, ty })
+    }
+
+    fn to_c_type(&self) -> String {
+        self.ty.c_type()
+    }
+
+    fn is_simd(&self) -> bool {
+        self.ty.is_simd()
+    }
+
+    pub fn is_ptr(&self) -> bool {
+        self.ty.is_ptr()
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct ArgumentList {
+    pub args: Vec<Argument>,
+}
+
+impl ArgumentList {
+    /// Creates an argument list from a Rust function signature, the data for
+    /// this function should only be the arguments.
+    /// e.g. for `fn test(a: u32, b: u32) -> u32` data should just be `a: u32, b: u32`
+    fn from_rust_arguments(data: &str) -> Result<Self, String> {
+        let args = data
+            .split(',')
+            .enumerate()
+            .map(|(idx, arg)| Argument::from_rust(idx, arg))
+            .collect::<Result<_, _>>()?;
+
+        Ok(Self { args })
+    }
+
+    /// Converts the argument list into the call paramters for a C function call.
+    /// e.g. this would generate something like `a, &b, c`
+    pub fn as_call_param_c(&self) -> String {
+        self.args
+            .iter()
+            .map(|arg| match arg.ty {
+                IntrinsicType::Ptr { .. } => {
+                    format!("&{}", arg.name)
+                }
+                IntrinsicType::Type { .. } => arg.name.clone(),
+            })
+            .collect::<Vec<String>>()
+            .join(", ")
+    }
+
+    /// Converts the argument list into the call paramters for a Rust function.
+    /// e.g. this would generate something like `a, b, c`
+    pub fn as_call_param_rust(&self) -> String {
+        self.args
+            .iter()
+            .map(|arg| arg.name.clone())
+            .collect::<Vec<String>>()
+            .join(", ")
+    }
+
+    /// Creates a line that initializes this argument for C code.
+    /// e.g. `int32x2_t a = { 0x1, 0x2 };`
+    pub fn init_random_values_c(&self, pass: usize) -> String {
+        self.iter()
+            .map(|arg| {
+                format!(
+                    "{ty} {name} = {{ {values} }};",
+                    ty = arg.to_c_type(),
+                    name = arg.name,
+                    values = arg.ty.populate_random(pass, &Language::C)
+                )
+            })
+            .collect::<Vec<_>>()
+            .join("\n    ")
+    }
+
+    /// Creates a line that initializes this argument for Rust code.
+    /// e.g. `let a = transmute([0x1, 0x2]);`
+    pub fn init_random_values_rust(&self, pass: usize) -> String {
+        self.iter()
+            .map(|arg| {
+                if arg.is_simd() {
+                    format!(
+                        "let {name} = ::std::mem::transmute([{values}]);",
+                        name = arg.name,
+                        values = arg.ty.populate_random(pass, &Language::Rust),
+                    )
+                } else {
+                    format!(
+                        "let {name} = {value};",
+                        name = arg.name,
+                        value = arg.ty.populate_random(pass, &Language::Rust)
+                    )
+                }
+            })
+            .collect::<Vec<_>>()
+            .join("\n        ")
+    }
+
+    pub fn iter(&self) -> std::slice::Iter<'_, Argument> {
+        self.args.iter()
+    }
+}
+
+impl<'de> Deserialize<'de> for ArgumentList {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        use serde::de::Error;
+        let s = String::deserialize(deserializer)?;
+        Self::from_rust_arguments(&s).map_err(Error::custom)
+    }
+}
diff --git a/crates/intrinsic-test/src/intrinsic.rs b/crates/intrinsic-test/src/intrinsic.rs
new file mode 100644
index 0000000000..499cf7612c
--- /dev/null
+++ b/crates/intrinsic-test/src/intrinsic.rs
@@ -0,0 +1,112 @@
+use crate::types::{IntrinsicType, TypeKind};
+
+use super::argument::ArgumentList;
+use serde::de::Unexpected;
+use serde::{de, Deserialize, Deserializer};
+
+/// An intrinsic
+#[derive(Deserialize, Debug, PartialEq, Clone)]
+pub struct Intrinsic {
+    /// If the intrinsic should be tested.
+    #[serde(deserialize_with = "bool_from_string")]
+    pub enabled: bool,
+
+    /// The function name of this intrinsic.
+    pub name: String,
+
+    /// Any arguments for this intrinsinc.
+    #[serde(rename = "args")]
+    pub arguments: ArgumentList,
+
+    /// The return type of this intrinsic.
+    #[serde(rename = "return")]
+    pub results: IntrinsicType,
+}
+
+impl Intrinsic {
+    /// Generates a std::cout for the intrinsics results that will match the
+    /// rust debug output format for the return type.
+    pub fn print_result_c(&self, index: usize) -> String {
+        let lanes = if self.results.num_lanes() > 1 {
+            (0..self.results.num_lanes())
+                .map(|idx| -> std::string::String {
+                    format!(
+                        "{cast}{lane_fn}(__return_value, {lane})",
+                        cast = self.results.c_promotion(),
+                        lane_fn = self.results.get_lane_function(),
+                        lane = idx
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(r#" << ", " << "#)
+        } else {
+            format!(
+                "{promote}cast<{cast}>(__return_value)",
+                cast = match self.results.kind() {
+                    TypeKind::Float if self.results.inner_size() == 32 => "float".to_string(),
+                    TypeKind::Float if self.results.inner_size() == 64 => "double".to_string(),
+                    TypeKind::Int => format!("int{}_t", self.results.inner_size()),
+                    TypeKind::UInt => format!("uint{}_t", self.results.inner_size()),
+                    TypeKind::Poly => format!("poly{}_t", self.results.inner_size()),
+                    ty => todo!("print_result_c - Unknown type: {:#?}", ty),
+                },
+                promote = self.results.c_promotion(),
+            )
+        };
+
+        format!(
+            r#"std::cout << "Result {idx}: {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
+            ty = if self.results.is_simd() {
+                format!("{}(", self.results.c_type())
+            } else {
+                String::from("")
+            },
+            close = if self.results.is_simd() { ")" } else { "" },
+            lanes = lanes,
+            idx = index,
+        )
+    }
+
+    pub fn generate_pass_rust(&self, index: usize) -> String {
+        format!(
+            r#"
+    unsafe {{
+        {initialized_args}
+        let res = {intrinsic_call}({args});
+        println!("Result {idx}: {{:.150?}}", res);
+    }}"#,
+            initialized_args = self.arguments.init_random_values_rust(index),
+            intrinsic_call = self.name,
+            args = self.arguments.as_call_param_rust(),
+            idx = index,
+        )
+    }
+
+    pub fn generate_pass_c(&self, index: usize) -> String {
+        format!(
+            r#"  {{
+    {initialized_args}
+    auto __return_value = {intrinsic_call}({args});
+    {print_result}
+  }}"#,
+            initialized_args = self.arguments.init_random_values_c(index),
+            intrinsic_call = self.name,
+            args = self.arguments.as_call_param_c(),
+            print_result = self.print_result_c(index)
+        )
+    }
+}
+
+fn bool_from_string<'de, D>(deserializer: D) -> Result<bool, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    match String::deserialize(deserializer)?.to_uppercase().as_ref() {
+        "TRUE" => Ok(true),
+        "FALSE" => Ok(false),
+        other => Err(de::Error::invalid_value(
+            Unexpected::Str(other),
+            &"TRUE or FALSE",
+        )),
+    }
+}
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
new file mode 100644
index 0000000000..22e1be5e4f
--- /dev/null
+++ b/crates/intrinsic-test/src/main.rs
@@ -0,0 +1,380 @@
+#[macro_use]
+extern crate lazy_static;
+#[macro_use]
+extern crate log;
+
+use std::fs::File;
+use std::io::Write;
+use std::process::Command;
+
+use clap::{App, Arg};
+use intrinsic::Intrinsic;
+use rayon::prelude::*;
+use types::TypeKind;
+
+mod argument;
+mod intrinsic;
+mod types;
+mod values;
+
+#[derive(Debug, PartialEq)]
+pub enum Language {
+    Rust,
+    C,
+}
+
+fn generate_c_program(header_file: &str, intrinsic: &Intrinsic) -> String {
+    format!(
+        r#"#include <{header_file}>
+#include <iostream>
+#include <cstring>
+#include <iomanip>
+#include <sstream>
+template<typename T1, typename T2> T1 cast(T2 x) {{
+  static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
+  T1 ret = 0;
+  memcpy(&ret, &x, sizeof(T1));
+  return ret;
+}}
+std::ostream& operator<<(std::ostream& os, poly128_t value) {{
+  std::stringstream temp;
+  do {{
+    int n = value % 10;
+    value /= 10;
+    temp << n;
+  }} while (value != 0);
+  std::string tempstr(temp.str());
+  std::string res(tempstr.rbegin(), tempstr.rend());
+  os << res;
+  return os;
+}}
+int main(int argc, char **argv) {{
+{passes}
+    return 0;
+}}"#,
+        header_file = header_file,
+        passes = (1..20)
+            .map(|idx| intrinsic.generate_pass_c(idx))
+            .collect::<Vec<_>>()
+            .join("\n"),
+    )
+}
+
+fn generate_rust_program(intrinsic: &Intrinsic) -> String {
+    format!(
+        r#"#![feature(simd_ffi)]
+#![feature(link_llvm_intrinsics)]
+#![feature(stdsimd)]
+#![allow(overflowing_literals)]
+use core::arch::aarch64::*;
+#[allow(unused_macros)]
+macro_rules! bits_to_float(
+    ($t:ty, $bits:expr) => (
+        {{ let x: $t = ::std::mem::transmute($bits); x  }}
+    )
+);
+
+fn main() {{
+{passes}
+}}
+"#,
+        passes = (1..20)
+            .map(|idx| intrinsic.generate_pass_rust(idx))
+            .collect::<Vec<_>>()
+            .join("\n"),
+    )
+}
+
+fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(format!(
+            "{cpp} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
+            target = "aarch64-unknown-linux-gnu",
+            filename = c_filename,
+            intrinsic = intrinsic.name,
+            cpp = compiler,
+        ))
+        .output();
+    if let Ok(output) = output {
+        if output.status.success() {
+            true
+        } else {
+            let stderr = std::str::from_utf8(&output.stderr).unwrap_or("");
+            if stderr.contains("error: use of undeclared identifier") {
+                warn!("Skipping intrinsic due to no support: {}", intrinsic.name);
+                true
+            } else {
+                error!(
+                    "Failed to compile code for intrinsic: {}\n\nstdout:\n{}\n\nstderr:\n{}",
+                    intrinsic.name,
+                    std::str::from_utf8(&output.stdout).unwrap_or(""),
+                    std::str::from_utf8(&output.stderr).unwrap_or("")
+                );
+                false
+            }
+        }
+    } else {
+        error!("Command failed: {:#?}", output);
+        false
+    }
+}
+
+fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
+    let _ = std::fs::create_dir("c_programs");
+    intrinsics
+        .par_iter()
+        .map(|i| {
+            let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
+            let mut file = File::create(&c_filename).unwrap();
+
+            let c_code = generate_c_program("arm_neon.h", &i);
+            file.write_all(c_code.into_bytes().as_slice()).unwrap();
+            compile_c(&c_filename, &i, compiler)
+        })
+        .find_any(|x| !x)
+        .is_none()
+}
+
+fn build_rust(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
+    intrinsics.iter().for_each(|i| {
+        let rust_dir = format!(r#"rust_programs/{}"#, i.name);
+        let _ = std::fs::create_dir_all(&rust_dir);
+        let rust_filename = format!(r#"{}/main.rs"#, rust_dir);
+        let mut file = File::create(&rust_filename).unwrap();
+
+        let c_code = generate_rust_program(&i);
+        file.write_all(c_code.into_bytes().as_slice()).unwrap();
+    });
+
+    let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
+    cargo
+        .write_all(
+            format!(
+                r#"[package]
+name = "intrinsic-test"
+version = "{version}"
+authors = ["{authors}"]
+edition = "2018"
+[workspace]
+{binaries}"#,
+                version = env!("CARGO_PKG_VERSION"),
+                authors = env!("CARGO_PKG_AUTHORS"),
+                binaries = intrinsics
+                    .iter()
+                    .map(|i| {
+                        format!(
+                            r#"[[bin]]
+name = "{intrinsic}"
+path = "{intrinsic}/main.rs""#,
+                            intrinsic = i.name
+                        )
+                    })
+                    .collect::<Vec<_>>()
+                    .join("\n")
+            )
+            .into_bytes()
+            .as_slice(),
+        )
+        .unwrap();
+
+    let output = Command::new("sh")
+        .current_dir("rust_programs")
+        .arg("-c")
+        .arg(format!(
+            "cargo +{toolchain} build --release",
+            toolchain = toolchain,
+        ))
+        .output();
+    if let Ok(output) = output {
+        if output.status.success() {
+            true
+        } else {
+            error!(
+                "Failed to compile code for intrinsics\n\nstdout:\n{}\n\nstderr:\n{}",
+                std::str::from_utf8(&output.stdout).unwrap_or(""),
+                std::str::from_utf8(&output.stderr).unwrap_or("")
+            );
+            false
+        }
+    } else {
+        error!("Command failed: {:#?}", output);
+        false
+    }
+}
+
+fn main() {
+    pretty_env_logger::init();
+
+    let matches = App::new("Intrinsic test tool")
+        .about("Generates Rust and C programs for intrinsics and compares the output")
+        .arg(
+            Arg::with_name("INPUT")
+                .help("The input file containing the intrinsics")
+                .required(true)
+                .index(1),
+        )
+        .arg(
+            Arg::with_name("TOOLCHAIN")
+                .takes_value(true)
+                .default_value("nightly")
+                .long("toolchain")
+                .help("The rust toolchain to use for building the rust code"),
+        )
+        .arg(
+            Arg::with_name("CPPCOMPILER")
+                .takes_value(true)
+                .default_value("clang++")
+                .long("cppcompiler")
+                .help("The C++ compiler to use for compiling the c++ code"),
+        )
+        .get_matches();
+
+    let filename = matches.value_of("INPUT").unwrap();
+    let toolchain = matches.value_of("TOOLCHAIN").unwrap();
+    let cpp_compiler = matches.value_of("CPPCOMPILER").unwrap();
+
+    let mut csv_reader = csv::Reader::from_reader(std::fs::File::open(filename).unwrap());
+
+    let mut intrinsics = csv_reader
+        .deserialize()
+        .filter_map(|x| -> Option<Intrinsic> {
+            debug!("Processing {:#?}", x);
+            match x {
+                Ok(a) => Some(a),
+                e => {
+                    error!("{:#?}", e);
+                    None
+                }
+            }
+        })
+        // Only perform the test for intrinsics that are enabled...
+        .filter(|i| i.enabled)
+        // Not sure how we would compare intrinsic that returns void.
+        .filter(|i| i.results.kind() != TypeKind::Void)
+        .filter(|i| i.results.kind() != TypeKind::BFloat)
+        .filter(|i| !(i.results.kind() == TypeKind::Float && i.results.inner_size() == 16))
+        .filter(|i| {
+            i.arguments
+                .iter()
+                .find(|a| a.ty.kind() == TypeKind::BFloat)
+                .is_none()
+        })
+        .filter(|i| {
+            i.arguments
+                .iter()
+                .find(|a| a.ty.kind() == TypeKind::Float && a.ty.inner_size() == 16)
+                .is_none()
+        })
+        // Skip pointers for now, we would probably need to look at the return
+        // type to work out how many elements we need to point to.
+        .filter(|i| i.arguments.iter().find(|a| a.is_ptr()).is_none())
+        // intrinsics with a lane parameter have constraints, deal with them later.
+        .filter(|i| {
+            i.arguments
+                .iter()
+                .find(|a| a.name.starts_with("lane"))
+                .is_none()
+        })
+        .filter(|i| i.arguments.iter().find(|a| a.name == "n").is_none())
+        .collect::<Vec<_>>();
+    intrinsics.dedup();
+
+    if !build_c(&intrinsics, cpp_compiler) {
+        std::process::exit(2);
+    }
+
+    if !build_rust(&intrinsics, &toolchain) {
+        std::process::exit(3);
+    }
+
+    if !compare_outputs(&intrinsics, &toolchain) {
+        std::process::exit(1)
+    }
+}
+
+enum FailureReason {
+    RunC(String),
+    RunRust(String),
+    Difference(String, String, String),
+}
+
+fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
+    let intrinsics = intrinsics
+        .par_iter()
+        .filter_map(|intrinsic| {
+            let c = Command::new("sh")
+                .arg("-c")
+                .arg(format!(
+                    "./c_programs/{intrinsic}",
+                    intrinsic = intrinsic.name,
+                ))
+                .output();
+            let rust = Command::new("sh")
+                .current_dir("rust_programs")
+                .arg("-c")
+                .arg(format!(
+                    "cargo +{toolchain} run --release --bin {intrinsic}",
+                    intrinsic = intrinsic.name,
+                    toolchain = toolchain,
+                ))
+                .output();
+
+            let (c, rust) = match (c, rust) {
+                (Ok(c), Ok(rust)) => (c, rust),
+                a => panic!("{:#?}", a),
+            };
+
+            if !c.status.success() {
+                error!("Failed to run C program for intrinsic {}", intrinsic.name);
+                return Some(FailureReason::RunC(intrinsic.name.clone()));
+            }
+
+            if !rust.status.success() {
+                error!(
+                    "Failed to run rust program for intrinsic {}",
+                    intrinsic.name
+                );
+                return Some(FailureReason::RunRust(intrinsic.name.clone()));
+            }
+
+            info!("Comparing intrinsic: {}", intrinsic.name);
+
+            let c = std::str::from_utf8(&c.stdout)
+                .unwrap()
+                .to_lowercase()
+                .replace("-nan", "nan");
+            let rust = std::str::from_utf8(&rust.stdout)
+                .unwrap()
+                .to_lowercase()
+                .replace("-nan", "nan");
+
+            if c == rust {
+                None
+            } else {
+                Some(FailureReason::Difference(intrinsic.name.clone(), c, rust))
+            }
+        })
+        .collect::<Vec<_>>();
+
+    intrinsics.iter().for_each(|reason| match reason {
+        FailureReason::Difference(intrinsic, c, rust) => {
+            println!("Difference for intrinsic: {}", intrinsic);
+            let diff = diff::lines(c, rust);
+            diff.iter().for_each(|diff| match diff {
+                diff::Result::Left(c) => println!("C: {}", c),
+                diff::Result::Right(rust) => println!("Rust: {}", rust),
+                diff::Result::Both(_, _) => (),
+            });
+            println!("****************************************************************");
+        }
+        FailureReason::RunC(intrinsic) => {
+            println!("Failed to run C program for intrinsic {}", intrinsic)
+        }
+        FailureReason::RunRust(intrinsic) => {
+            println!("Failed to run rust program for intrinsic {}", intrinsic)
+        }
+    });
+    println!("{} differences found", intrinsics.len());
+    intrinsics.is_empty()
+}
diff --git a/crates/intrinsic-test/src/types.rs b/crates/intrinsic-test/src/types.rs
new file mode 100644
index 0000000000..c037ed3ae5
--- /dev/null
+++ b/crates/intrinsic-test/src/types.rs
@@ -0,0 +1,483 @@
+use regex::Regex;
+use serde::{Deserialize, Deserializer};
+use std::fmt;
+use std::str::FromStr;
+
+use crate::values::values_for_pass;
+use crate::Language;
+
+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum TypeKind {
+    BFloat,
+    Float,
+    Int,
+    UInt,
+    Poly,
+    Void,
+}
+
+impl FromStr for TypeKind {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "bfloat" => Ok(Self::BFloat),
+            "float" => Ok(Self::Float),
+            "int" => Ok(Self::Int),
+            "poly" => Ok(Self::Poly),
+            "uint" | "unsigned" => Ok(Self::UInt),
+            "void" => Ok(Self::Void),
+            _ => Err(format!("Impossible to parse argument kind {}", s)),
+        }
+    }
+}
+
+impl fmt::Display for TypeKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                Self::BFloat => "bfloat",
+                Self::Float => "float",
+                Self::Int => "int",
+                Self::UInt => "uint",
+                Self::Poly => "poly",
+                Self::Void => "void",
+            }
+        )
+    }
+}
+
+impl TypeKind {
+    /// Gets the type part of a c typedef for a type that's in the form of {type}{size}_t.
+    pub fn c_prefix(&self) -> &str {
+        match self {
+            Self::Float => "float",
+            Self::Int => "int",
+            Self::UInt => "uint",
+            Self::Poly => "poly",
+            _ => unreachable!("Not used: {:#?}", self),
+        }
+    }
+
+    /// Gets the rust prefix for the type kind i.e. i, u, f.
+    pub fn rust_prefix(&self) -> &str {
+        match self {
+            Self::Float => "f",
+            Self::Int => "i",
+            Self::UInt => "u",
+            Self::Poly => "u",
+            _ => unreachable!("Unused type kind: {:#?}", self),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum IntrinsicType {
+    Ptr {
+        constant: bool,
+        child: Box<IntrinsicType>,
+    },
+    Type {
+        constant: bool,
+        kind: TypeKind,
+        /// The bit length of this type (e.g. 32 for u32).
+        bit_len: Option<u32>,
+
+        /// Length of the SIMD vector (i.e. 4 for uint32x4_t), A value of `None`
+        /// means this is not a simd type. A `None` can be assumed to be 1,
+        /// although in some places a distinction is needed between `u64` and
+        /// `uint64x1_t` this signals that.
+        simd_len: Option<u32>,
+
+        /// The number of rows for SIMD matrices (i.e. 2 for uint8x8x2_t).
+        /// A value of `None` represents a type that does not contain any
+        /// rows encoded in the type (e.g. uint8x8_t).
+        /// A value of `None` can be assumed to be 1 though.
+        vec_len: Option<u32>,
+    },
+}
+
+impl IntrinsicType {
+    /// Get the TypeKind for this type, recursing into pointers.
+    pub fn kind(&self) -> TypeKind {
+        match *self {
+            IntrinsicType::Ptr { ref child, .. } => child.kind(),
+            IntrinsicType::Type { kind, .. } => kind,
+        }
+    }
+
+    /// Get the size of a single element inside this type, recursing into
+    /// pointers, i.e. a pointer to a u16 would be 16 rather than the size
+    /// of a pointer.
+    pub fn inner_size(&self) -> u32 {
+        match *self {
+            IntrinsicType::Ptr { ref child, .. } => child.inner_size(),
+            IntrinsicType::Type {
+                bit_len: Some(bl), ..
+            } => bl,
+            _ => unreachable!(""),
+        }
+    }
+
+    pub fn num_lanes(&self) -> u32 {
+        match *self {
+            IntrinsicType::Ptr { ref child, .. } => child.num_lanes(),
+            IntrinsicType::Type {
+                simd_len: Some(sl), ..
+            } => sl,
+            _ => 1,
+        }
+    }
+
+    /// Determine if the type is a simd type, this will treat a type such as
+    /// `uint64x1` as simd.
+    pub fn is_simd(&self) -> bool {
+        match *self {
+            IntrinsicType::Ptr { ref child, .. } => child.is_simd(),
+            IntrinsicType::Type {
+                simd_len: None,
+                vec_len: None,
+                ..
+            } => false,
+            _ => true,
+        }
+    }
+
+    pub fn is_ptr(&self) -> bool {
+        match *self {
+            IntrinsicType::Ptr { .. } => true,
+            IntrinsicType::Type { .. } => false,
+        }
+    }
+
+    pub fn from_rust(ty: &str) -> Result<Self, String> {
+        lazy_static! {
+            static ref SIMD_TYPE: Regex = Regex::new(r#"([a-z]*)([0-9]*)x([0-9]*)_t"#).unwrap();
+            static ref MULTI_SIMD_TYPE: Regex =
+                Regex::new(r#"([a-z]*)([0-9]*)x([0-9]*)x([0-9]*)_t"#).unwrap();
+            static ref RUST_TYPE: Regex = Regex::new(r#"([iuf]|float|poly)([0-9]+)"#).unwrap();
+        }
+
+        debug!("Parsing type: {}", ty);
+
+        if let Some(ty) = ty.strip_prefix('*') {
+            let (constant, ty) = if let Some(ty) = ty.strip_prefix("const") {
+                (true, ty.trim())
+            } else if let Some(ty) = ty.strip_prefix("mut") {
+                (false, ty.trim())
+            } else {
+                (false, ty)
+            };
+            return Ok(Self::Ptr {
+                constant,
+                child: Box::new(Self::from_rust(ty)?),
+            });
+        }
+
+        let (constant, ty) = if let Some(ty) = ty.strip_prefix("const") {
+            (true, ty.trim())
+        } else {
+            (false, ty)
+        };
+
+        if let Some(captures) = MULTI_SIMD_TYPE.captures(ty) {
+            let kind = captures
+                .get(1)
+                .map(|s| s.as_str().parse::<TypeKind>().unwrap())
+                .unwrap();
+            let bit_len = captures.get(2).map(|s| s.as_str().parse::<u32>().unwrap());
+            let simd_len = captures.get(3).map(|s| s.as_str().parse::<u32>().unwrap());
+            let vec_len = captures.get(4).map(|s| s.as_str().parse::<u32>().unwrap());
+            Ok(Self::Type {
+                constant,
+                kind,
+                bit_len,
+                simd_len,
+                vec_len,
+            })
+        } else if let Some(captures) = SIMD_TYPE.captures(ty) {
+            let kind = captures
+                .get(1)
+                .map(|s| s.as_str().parse::<TypeKind>().unwrap())
+                .unwrap();
+            let bit_len = captures.get(2).map(|s| s.as_str().parse::<u32>().unwrap());
+            let simd_len = captures.get(3).map(|s| s.as_str().parse::<u32>().unwrap());
+
+            Ok(Self::Type {
+                constant,
+                kind,
+                bit_len,
+                simd_len,
+                vec_len: None,
+            })
+        } else if let Some(captures) = RUST_TYPE.captures(ty) {
+            let kind = captures
+                .get(1)
+                .map(|s| match s.as_str() {
+                    "i" => TypeKind::Int,
+                    "u" => TypeKind::UInt,
+                    "f" => TypeKind::Float,
+                    "float" => TypeKind::Float,
+                    "poly" => TypeKind::Poly,
+                    a => panic!("Unexpected type: {} found", a),
+                })
+                .unwrap();
+            let bit_len = captures.get(2).map(|s| s.as_str().parse::<u32>().unwrap());
+            Ok(Self::Type {
+                constant,
+                kind,
+                bit_len,
+                simd_len: None,
+                vec_len: None,
+            })
+        } else {
+            match ty {
+                "int" => Ok(Self::Type {
+                    constant,
+                    kind: TypeKind::Int,
+                    bit_len: Some(32),
+                    simd_len: None,
+                    vec_len: None,
+                }),
+                "void" => Ok(Self::Type {
+                    constant: false,
+                    kind: TypeKind::Void,
+                    bit_len: None,
+                    simd_len: None,
+                    vec_len: None,
+                }),
+                _ => Err(format!("Failed to parse type: {}", ty)),
+            }
+        }
+    }
+
+    #[allow(unused)]
+    fn c_scalar_type(&self) -> String {
+        format!(
+            "{prefix}{bits}_t",
+            prefix = self.kind().c_prefix(),
+            bits = self.inner_size()
+        )
+    }
+
+    fn rust_scalar_type(&self) -> String {
+        format!(
+            "{prefix}{bits}",
+            prefix = self.kind().rust_prefix(),
+            bits = self.inner_size()
+        )
+    }
+
+    /// Gets a string containing the typename for this type in C format.
+    pub fn c_type(&self) -> String {
+        match self {
+            IntrinsicType::Ptr { child, .. } => child.c_type(),
+            IntrinsicType::Type {
+                constant,
+                kind,
+                bit_len: Some(bit_len),
+                simd_len: None,
+                vec_len: None,
+                ..
+            } => format!(
+                "{}{}{}_t",
+                if *constant { "const " } else { "" },
+                kind.c_prefix(),
+                bit_len
+            ),
+            IntrinsicType::Type {
+                kind,
+                bit_len: Some(bit_len),
+                simd_len: Some(simd_len),
+                vec_len: None,
+                ..
+            } => format!("{}{}x{}_t", kind.c_prefix(), bit_len, simd_len),
+            IntrinsicType::Type {
+                kind,
+                bit_len: Some(bit_len),
+                simd_len: Some(simd_len),
+                vec_len: Some(vec_len),
+                ..
+            } => format!("{}{}x{}x{}_t", kind.c_prefix(), bit_len, simd_len, vec_len),
+            _ => todo!("{:#?}", self),
+        }
+    }
+
+    /// Gets a cast for this type if needs promotion.
+    /// This is required for 8 bit types due to printing as the 8 bit types use
+    /// a char and when using that in `std::cout` it will print as a character,
+    /// which means value of 0 will be printed as a null byte.
+    pub fn c_promotion(&self) -> &str {
+        match *self {
+            IntrinsicType::Type {
+                kind,
+                bit_len: Some(bit_len),
+                ..
+            } if bit_len == 8 => match kind {
+                TypeKind::Int => "(int)",
+                TypeKind::UInt => "(unsigned int)",
+                TypeKind::Poly => "(unsigned int)",
+                _ => "",
+            },
+            _ => "",
+        }
+    }
+
+    /// Generates a comma list of values that can be used to initialize an
+    /// argument for the intrinsic call.
+    /// This is determistic based on the pass number.
+    ///
+    /// * `pass`: The pass index, i.e. the iteration index for the call to an intrinsic
+    ///
+    /// Returns a string such as
+    /// * `0x1, 0x7F, 0xFF` if `language` is `Language::C`
+    /// * `0x1 as _, 0x7F as _, 0xFF as _` if `language` is `Language::Rust`
+    pub fn populate_random(&self, pass: usize, language: &Language) -> String {
+        match self {
+            IntrinsicType::Ptr { child, .. } => child.populate_random(pass, language),
+            IntrinsicType::Type {
+                bit_len: Some(bit_len),
+                kind,
+                simd_len,
+                vec_len,
+                ..
+            } if kind == &TypeKind::Int || kind == &TypeKind::UInt || kind == &TypeKind::Poly => (0
+                ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+                .map(|i| {
+                    format!(
+                        "{}{}",
+                        values_for_pass(*bit_len, i, pass),
+                        match language {
+                            &Language::Rust => format!(" as {ty} ", ty = self.rust_scalar_type()),
+                            &Language::C => String::from(""),
+                        }
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(","),
+            IntrinsicType::Type {
+                kind: TypeKind::Float,
+                bit_len: Some(32),
+                simd_len,
+                vec_len,
+                ..
+            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+                .map(|i| {
+                    format!(
+                        "{}{})",
+                        match language {
+                            &Language::Rust => "bits_to_float!(f32, ",
+                            &Language::C => "cast<float, uint32_t>(",
+                        },
+                        values_for_pass(32, i, pass),
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(","),
+            IntrinsicType::Type {
+                kind: TypeKind::Float,
+                bit_len: Some(64),
+                simd_len,
+                vec_len,
+                ..
+            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+                .map(|i| {
+                    format!(
+                        "{}{}{})",
+                        match language {
+                            &Language::Rust => "bits_to_float!(f64,",
+                            &Language::C => "cast<double, uint64_t>(",
+                        },
+                        values_for_pass(64, i, pass),
+                        match language {
+                            &Language::Rust => " as u64",
+                            &Language::C => "",
+                        }
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join(","),
+            _ => unreachable!("populate random: {:#?}", self),
+        }
+    }
+
+    /// Determines the load function for this type.
+    #[allow(unused)]
+    pub fn get_load_function(&self) -> String {
+        match self {
+            IntrinsicType::Ptr { child, .. } => child.get_load_function(),
+            IntrinsicType::Type {
+                kind: k,
+                bit_len: Some(bl),
+                simd_len,
+                vec_len,
+                ..
+            } => {
+                let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
+                    "q"
+                } else {
+                    ""
+                };
+                format!(
+                    "vld{len}{quad}_{type}{size}",
+                    type = match k {
+                        TypeKind::UInt => "u",
+                        TypeKind::Int => "s",
+                        TypeKind::Float => "f",
+                        TypeKind::Poly => "p",
+                        x => todo!("get_load_function TypeKind: {:#?}", x),
+                    },
+                    size = bl,
+                    quad = quad,
+                    len = vec_len.unwrap_or(1),
+                )
+            }
+            _ => todo!("get_load_function IntrinsicType: {:#?}", self),
+        }
+    }
+
+    /// Determines the get lane function for this type.
+    pub fn get_lane_function(&self) -> String {
+        match self {
+            IntrinsicType::Ptr { child, .. } => child.get_lane_function(),
+            IntrinsicType::Type {
+                kind: k,
+                bit_len: Some(bl),
+                simd_len,
+                ..
+            } => {
+                let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
+                    "q"
+                } else {
+                    ""
+                };
+                format!(
+                    "vget{quad}_lane_{type}{size}",
+                    type = match k {
+                        TypeKind::UInt => "u",
+                        TypeKind::Int => "s",
+                        TypeKind::Float => "f",
+                        TypeKind::Poly => "p",
+                        x => todo!("get_load_function TypeKind: {:#?}", x),
+                    },
+                    size = bl,
+                    quad = quad,
+                )
+            }
+            _ => todo!("get_lane_function IntrinsicType: {:#?}", self),
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for IntrinsicType {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        use serde::de::Error;
+        let s = String::deserialize(deserializer)?;
+        Self::from_rust(&s).map_err(Error::custom)
+    }
+}
diff --git a/crates/intrinsic-test/src/values.rs b/crates/intrinsic-test/src/values.rs
new file mode 100644
index 0000000000..4565edca09
--- /dev/null
+++ b/crates/intrinsic-test/src/values.rs
@@ -0,0 +1,126 @@
+/// Gets a hex constant value for a single lane in in a determistic way
+/// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values
+/// * `simd`: The index of the simd lane we are generating for
+/// * `pass`: The index of the pass we are generating the values for
+pub fn values_for_pass(bits: u32, simd: u32, pass: usize) -> String {
+    let index = pass + (simd as usize);
+
+    if bits == 8 {
+        format!("{:#X}", VALUES_8[index % VALUES_8.len()])
+    } else if bits == 16 {
+        format!("{:#X}", VALUES_16[index % VALUES_16.len()])
+    } else if bits == 32 {
+        format!("{:#X}", VALUES_32[index % VALUES_32.len()])
+    } else if bits == 64 {
+        format!("{:#X}", VALUES_64[index % VALUES_64.len()])
+    } else {
+        panic!("Unknown size: {}", bits);
+    }
+}
+
+pub const VALUES_8: &[u8] = &[
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0xf0, 0x80, 0x3b, 0xff,
+];
+
+pub const VALUES_16: &[u16] = &[
+    0x0000, // 0.0
+    0x0400, // The smallest normal value.
+    0x37ff, // The value just below 0.5.
+    0x3800, // 0.5
+    0x3801, // The value just above 0.5.
+    0x3bff, // The value just below 1.0.
+    0x3c00, // 1.0
+    0x3c01, // The value just above 1.0.
+    0x3e00, // 1.5
+    0x4900, // 10
+    0x7bff, // The largest finite value.
+    0x7c00, // Infinity.
+    // NaNs.
+    //  - Quiet NaNs
+    0x7f23, 0x7e00, //  - Signalling NaNs
+    0x7d23, 0x7c01, // Subnormals.
+    //  - A recognisable bit pattern.
+    0x0012, //  - The largest subnormal value.
+    0x03ff, //  - The smallest subnormal value.
+    0x0001, // The same values again, but negated.
+    0x8000, 0x8400, 0xb7ff, 0xb800, 0xb801, 0xbbff, 0xbc00, 0xbc01, 0xbe00, 0xc900, 0xfbff, 0xfc00,
+    0xff23, 0xfe00, 0xfd23, 0xfc01, 0x8012, 0x83ff, 0x8001,
+];
+
+pub const VALUES_32: &[u32] = &[
+    // Simple values.
+    0x00000000, // 0.0
+    0x00800000, // The smallest normal value.
+    0x3effffff, // The value just below 0.5.
+    0x3f000000, // 0.5
+    0x3f000001, // The value just above 0.5.
+    0x3f7fffff, // The value just below 1.0.
+    0x3f800000, // 1.0
+    0x3f800001, // The value just above 1.0.
+    0x3fc00000, // 1.5
+    0x41200000, // 10
+    0x7f8fffff, // The largest finite value.
+    0x7f800000, // Infinity.
+    // NaNs.
+    //  - Quiet NaNs
+    0x7fd23456, 0x7fc00000, //  - Signalling NaNs
+    0x7f923456, 0x7f800001, // Subnormals.
+    //  - A recognisable bit pattern.
+    0x00123456, //  - The largest subnormal value.
+    0x007fffff, //  - The smallest subnormal value.
+    0x00000001, // The same values again, but negated.
+    0x80000000, 0x80800000, 0xbeffffff, 0xbf000000, 0xbf000001, 0xbf7fffff, 0xbf800000, 0xbf800001,
+    0xbfc00000, 0xc1200000, 0xff8fffff, 0xff800000, 0xffd23456, 0xffc00000, 0xff923456, 0xff800001,
+    0x80123456, 0x807fffff, 0x80000001,
+];
+
+pub const VALUES_64: &[u64] = &[
+    // Simple values.
+    0x0000000000000000, // 0.0
+    0x0010000000000000, // The smallest normal value.
+    0x3fdfffffffffffff, // The value just below 0.5.
+    0x3fe0000000000000, // 0.5
+    0x3fe0000000000001, // The value just above 0.5.
+    0x3fefffffffffffff, // The value just below 1.0.
+    0x3ff0000000000000, // 1.0
+    0x3ff0000000000001, // The value just above 1.0.
+    0x3ff8000000000000, // 1.5
+    0x4024000000000000, // 10
+    0x7fefffffffffffff, // The largest finite value.
+    0x7ff0000000000000, // Infinity.
+    // NaNs.
+    //  - Quiet NaNs
+    0x7ff923456789abcd,
+    0x7ff8000000000000,
+    //  - Signalling NaNs
+    0x7ff123456789abcd,
+    0x7ff0000000000000,
+    // Subnormals.
+    //  - A recognisable bit pattern.
+    0x000123456789abcd,
+    //  - The largest subnormal value.
+    0x000fffffffffffff,
+    //  - The smallest subnormal value.
+    0x0000000000000001,
+    // The same values again, but negated.
+    0x8000000000000000,
+    0x8010000000000000,
+    0xbfdfffffffffffff,
+    0xbfe0000000000000,
+    0xbfe0000000000001,
+    0xbfefffffffffffff,
+    0xbff0000000000000,
+    0xbff0000000000001,
+    0xbff8000000000000,
+    0xc024000000000000,
+    0xffefffffffffffff,
+    0xfff0000000000000,
+    0xfff923456789abcd,
+    0xfff8000000000000,
+    0xfff123456789abcd,
+    0xfff0000000000000,
+    0x800123456789abcd,
+    0x800fffffffffffff,
+    0x8000000000000001,
+];

From a6cdf1c5121346fa541fcc27457aad889e620e28 Mon Sep 17 00:00:00 2001
From: Jamie Cunliffe <Jamie.Cunliffe@arm.com>
Date: Mon, 7 Jun 2021 16:50:05 +0100
Subject: [PATCH 2/4] Add the intrinsic test tool to the CI

---
 .../aarch64-unknown-linux-gnu/Dockerfile      |    6 +-
 ci/run-docker.sh                              |    4 +-
 ci/run.sh                                     |    7 +
 crates/intrinsic-test/README.md               |    3 +-
 crates/intrinsic-test/neon-intrinsics.csv     | 4356 +++++++++++++++++
 crates/intrinsic-test/src/main.rs             |   38 +-
 crates/intrinsic-test/src/types.rs            |   12 +-
 7 files changed, 4402 insertions(+), 24 deletions(-)
 create mode 100644 crates/intrinsic-test/neon-intrinsics.csv

diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 09ead2c017..f0c1493ab8 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,13 +1,17 @@
 FROM ubuntu:20.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
+  g++ \
   ca-certificates \
   libc6-dev \
   gcc-aarch64-linux-gnu \
+  g++-aarch64-linux-gnu \
   libc6-dev-arm64-cross \
   qemu-user \
   make \
-  file
+  file \
+  clang-9 \
+  lld
 
 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
     CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index ec9393adf2..fb47b752d5 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -9,7 +9,7 @@ run() {
     target=$(echo "${1}" | sed 's/-emulated//')
     echo "Building docker container for TARGET=${1}"
     docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
-    mkdir -p target
+    mkdir -p target c_programs rust_programs
     echo "Running docker"
     # shellcheck disable=SC2016
     docker run \
@@ -29,6 +29,8 @@ run() {
       --volume "$(rustc --print sysroot)":/rust:ro \
       --volume "$(pwd)":/checkout:ro \
       --volume "$(pwd)"/target:/checkout/target \
+      --volume "$(pwd)"/c_programs:/checkout/c_programs \
+      --volume "$(pwd)"/rust_programs:/checkout/rust_programs \
       --init \
       --workdir /checkout \
       --privileged \
diff --git a/ci/run.sh b/ci/run.sh
index 1766a37ca3..cd7a04d23f 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -65,6 +65,8 @@ cargo_test() {
 CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
 STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
 STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
+INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
+
 cargo_test "${CORE_ARCH} --release"
 
 if [ "$NOSTD" != "1" ]; then
@@ -111,6 +113,11 @@ case ${TARGET} in
 
 esac
 
+if [ "${TARGET}" = "aarch64-unknown-linux-gnu" ]; then
+    export CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
+    cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/neon-intrinsics.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-9"
+fi
+
 if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
     # Test examples
     (
diff --git a/crates/intrinsic-test/README.md b/crates/intrinsic-test/README.md
index 0f60aaa040..8a8ddab404 100644
--- a/crates/intrinsic-test/README.md
+++ b/crates/intrinsic-test/README.md
@@ -12,7 +12,8 @@ FLAGS:
 
 OPTIONS:
         --cppcompiler <CPPCOMPILER>    The C++ compiler to use for compiling the c++ code [default: clang++]
-        --toolchain <TOOLCHAIN>        The rust toolchain to use for building the rust code [default: nightly]
+        --runner <RUNNER>              Run the C programs under emulation with this command
+        --toolchain <TOOLCHAIN>        The rust toolchain to use for building the rust code
 
 ARGS:
     <INPUT>    The input file containing the intrinsics
diff --git a/crates/intrinsic-test/neon-intrinsics.csv b/crates/intrinsic-test/neon-intrinsics.csv
new file mode 100644
index 0000000000..438c6667f5
--- /dev/null
+++ b/crates/intrinsic-test/neon-intrinsics.csv
@@ -0,0 +1,4356 @@
+enabled,name,args,return,comment
+FALSE,__crc32b,"a: u32, b: u8",u32,CRC32 checksum
+FALSE,__crc32cb,"a: u32, b: u8",u32,CRC32 checksum
+TRUE,__crc32cd,"a: u32, b: u64",u32,CRC32 checksum
+FALSE,__crc32ch,"a: u32, b: u16",u32,CRC32 checksum
+FALSE,__crc32cw,"a: u32, b: u32",u32,CRC32 checksum
+TRUE,__crc32d,"a: u32, b: u64",u32,CRC32 checksum
+FALSE,__crc32h,"a: u32, b: u16",u32,CRC32 checksum
+FALSE,__crc32w,"a: u32, b: u32",u32,CRC32 checksum
+TRUE,vaba_s16,"a: int16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Signed absolute difference and accumulate
+TRUE,vaba_s32,"a: int32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Signed absolute difference and accumulate
+TRUE,vaba_s8,"a: int8x8_t, b: int8x8_t, c: int8x8_t",int8x8_t,Signed absolute difference and accumulate
+TRUE,vaba_u16,"a: uint16x4_t, b: uint16x4_t, c: uint16x4_t",uint16x4_t,Unsigned absolute difference and accumulate
+TRUE,vaba_u32,"a: uint32x2_t, b: uint32x2_t, c: uint32x2_t",uint32x2_t,Unsigned absolute difference and accumulate
+TRUE,vaba_u8,"a: uint8x8_t, b: uint8x8_t, c: uint8x8_t",uint8x8_t,Unsigned absolute difference and accumulate
+TRUE,vabal_high_s16,"a: int32x4_t, b: int16x8_t, c: int16x8_t",int32x4_t,Signed absolute difference and accumulate long
+TRUE,vabal_high_s32,"a: int64x2_t, b: int32x4_t, c: int32x4_t",int64x2_t,Signed absolute difference and accumulate long
+TRUE,vabal_high_s8,"a: int16x8_t, b: int8x16_t, c: int8x16_t",int16x8_t,Signed absolute difference and accumulate long
+TRUE,vabal_high_u16,"a: uint32x4_t, b: uint16x8_t, c: uint16x8_t",uint32x4_t,Unsigned absolute difference and accumulate long
+TRUE,vabal_high_u32,"a: uint64x2_t, b: uint32x4_t, c: uint32x4_t",uint64x2_t,Unsigned absolute difference and accumulate long
+TRUE,vabal_high_u8,"a: uint16x8_t, b: uint8x16_t, c: uint8x16_t",uint16x8_t,Unsigned absolute difference and accumulate long
+TRUE,vabal_s16,"a: int32x4_t, b: int16x4_t, c: int16x4_t",int32x4_t,Signed absolute difference and accumulate long
+TRUE,vabal_s32,"a: int64x2_t, b: int32x2_t, c: int32x2_t",int64x2_t,Signed absolute difference and accumulate long
+TRUE,vabal_s8,"a: int16x8_t, b: int8x8_t, c: int8x8_t",int16x8_t,Signed absolute difference and accumulate long
+TRUE,vabal_u16,"a: uint32x4_t, b: uint16x4_t, c: uint16x4_t",uint32x4_t,Unsigned absolute difference and accumulate long
+TRUE,vabal_u32,"a: uint64x2_t, b: uint32x2_t, c: uint32x2_t",uint64x2_t,Unsigned absolute difference and accumulate long
+TRUE,vabal_u8,"a: uint16x8_t, b: uint8x8_t, c: uint8x8_t",uint16x8_t,Unsigned absolute difference and accumulate long
+TRUE,vabaq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Signed absolute difference and accumulate
+TRUE,vabaq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Signed absolute difference and accumulate
+TRUE,vabaq_s8,"a: int8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Signed absolute difference and accumulate
+TRUE,vabaq_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Unsigned absolute difference and accumulate
+TRUE,vabaq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Unsigned absolute difference and accumulate
+TRUE,vabaq_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Unsigned absolute difference and accumulate
+FALSE,vabd_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point absolute difference
+TRUE,vabd_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point absolute difference
+TRUE,vabd_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point absolute difference
+TRUE,vabd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed absolute difference
+TRUE,vabd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed absolute difference
+TRUE,vabd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed absolute difference
+TRUE,vabd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned absolute difference
+TRUE,vabd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned absolute difference
+TRUE,vabd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned absolute difference
+FALSE,vabdd_f64,"a: float64_t, b: float64_t",float64_t,Floating-point absolute difference
+FALSE,vabdh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point absolute difference
+TRUE,vabdl_high_s16,"a: int16x8_t, b: int16x8_t",int32x4_t,Signed absolute difference long
+TRUE,vabdl_high_s32,"a: int32x4_t, b: int32x4_t",int64x2_t,Signed absolute difference long
+TRUE,vabdl_high_s8,"a: int8x16_t, b: int8x16_t",int16x8_t,Signed absolute difference long
+TRUE,vabdl_high_u16,"a: uint16x8_t, b: uint16x8_t",uint32x4_t,Unsigned absolute difference long
+TRUE,vabdl_high_u32,"a: uint32x4_t, b: uint32x4_t",uint64x2_t,Unsigned absolute difference long
+TRUE,vabdl_high_u8,"a: uint8x16_t, b: uint8x16_t",uint16x8_t,Unsigned absolute difference long
+TRUE,vabdl_s16,"a: int16x4_t, b: int16x4_t",int32x4_t,Signed absolute difference long
+TRUE,vabdl_s32,"a: int32x2_t, b: int32x2_t",int64x2_t,Signed absolute difference long
+TRUE,vabdl_s8,"a: int8x8_t, b: int8x8_t",int16x8_t,Signed absolute difference long
+TRUE,vabdl_u16,"a: uint16x4_t, b: uint16x4_t",uint32x4_t,Unsigned absolute difference long
+TRUE,vabdl_u32,"a: uint32x2_t, b: uint32x2_t",uint64x2_t,Unsigned absolute difference long
+TRUE,vabdl_u8,"a: uint8x8_t, b: uint8x8_t",uint16x8_t,Unsigned absolute difference long
+FALSE,vabdq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point absolute difference
+TRUE,vabdq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point absolute difference
+TRUE,vabdq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point absolute difference
+TRUE,vabdq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed absolute difference
+TRUE,vabdq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed absolute difference
+TRUE,vabdq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,l
+TRUE,vabdq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned absolute difference
+TRUE,vabdq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned absolute difference
+TRUE,vabdq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned absolute difference
+FALSE,vabds_f32,"a: f32, b: f32",f32,Floating-point absolute difference
+FALSE,vabs_f16,a: float16x4_t,float16x4_t,Floating-point absolute value
+TRUE,vabs_f32,a: float32x2_t,float32x2_t,Floating-point absolute value
+TRUE,vabs_f64,a: float64x1_t,float64x1_t,Floating-point absolute value
+TRUE,vabs_s16,a: int16x4_t,int16x4_t,Absolute value
+TRUE,vabs_s32,a: int32x2_t,int32x2_t,Absolute value
+TRUE,vabs_s64,a: int64x1_t,int64x1_t,Absolute value
+TRUE,vabs_s8,a: int8x8_t,int8x8_t,Absolute value
+TRUE,vabsd_s64,a: i64,i64,Absolute value
+FALSE,vabsh_f16,a: float16_t,float16_t,Floating-point absolute value
+FALSE,vabsq_f16,a: float16x8_t,float16x8_t,Floating-point absolute value
+TRUE,vabsq_f32,a: float32x4_t,float32x4_t,Floating-point absolute value
+TRUE,vabsq_f64,a: float64x2_t,float64x2_t,Floating-point absolute value
+TRUE,vabsq_s16,a: int16x8_t,int16x8_t,Absolute value
+TRUE,vabsq_s32,a: int32x4_t,int32x4_t,Absolute value
+TRUE,vabsq_s64,a: int64x2_t,int64x2_t,Absolute value
+TRUE,vabsq_s8,a: int8x16_t,int8x16_t,Absolute value
+FALSE,vadd_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point add
+TRUE,vadd_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point add
+TRUE,vadd_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point add
+FALSE,vadd_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Bitwise exclusive OR
+FALSE,vadd_p64,"a: poly64x1_t, b: poly64x1_t",poly64x1_t,Bitwise exclusive OR
+FALSE,vadd_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Bitwise exclusive OR
+TRUE,vadd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Add
+TRUE,vadd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Add
+TRUE,vadd_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Add
+TRUE,vadd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Add
+TRUE,vadd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Add
+TRUE,vadd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Add
+TRUE,vadd_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Add
+TRUE,vadd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Add
+TRUE,vaddd_s64,"a: i64, b: i64",i64,Add
+TRUE,vaddd_u64,"a: u64, b: u64",u64,Add
+FALSE,vaddh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point add
+TRUE,vaddhn_high_s16,"r: int8x8_t, a: int16x8_t, b: int16x8_t",int8x16_t,Add returning high narrow
+TRUE,vaddhn_high_s32,"r: int16x4_t, a: int32x4_t, b: int32x4_t",int16x8_t,Add returning high narrow
+TRUE,vaddhn_high_s64,"r: int32x2_t, a: int64x2_t, b: int64x2_t",int32x4_t,Add returning high narrow
+TRUE,vaddhn_high_u16,"r: uint8x8_t, a: uint16x8_t, b: uint16x8_t",uint8x16_t,Add returning high narrow
+TRUE,vaddhn_high_u32,"r: uint16x4_t, a: uint32x4_t, b: uint32x4_t",uint16x8_t,Add returning high narrow
+TRUE,vaddhn_high_u64,"r: uint32x2_t, a: uint64x2_t, b: uint64x2_t",uint32x4_t,Add returning high narrow
+TRUE,vaddhn_s16,"a: int16x8_t, b: int16x8_t",int8x8_t,Add returning high narrow
+TRUE,vaddhn_s32,"a: int32x4_t, b: int32x4_t",int16x4_t,Add returning high narrow
+TRUE,vaddhn_s64,"a: int64x2_t, b: int64x2_t",int32x2_t,Add returning high narrow
+TRUE,vaddhn_u16,"a: uint16x8_t, b: uint16x8_t",uint8x8_t,Add returning high narrow
+TRUE,vaddhn_u32,"a: uint32x4_t, b: uint32x4_t",uint16x4_t,Add returning high narrow
+TRUE,vaddhn_u64,"a: uint64x2_t, b: uint64x2_t",uint32x2_t,Add returning high narrow
+TRUE,vaddl_high_s16,"a: int16x8_t, b: int16x8_t",int32x4_t,Signed add long
+TRUE,vaddl_high_s32,"a: int32x4_t, b: int32x4_t",int64x2_t,Signed add long
+TRUE,vaddl_high_s8,"a: int8x16_t, b: int8x16_t",int16x8_t,Signed add long
+TRUE,vaddl_high_u16,"a: uint16x8_t, b: uint16x8_t",uint32x4_t,Unsigned add long
+TRUE,vaddl_high_u32,"a: uint32x4_t, b: uint32x4_t",uint64x2_t,Unsigned add long
+TRUE,vaddl_high_u8,"a: uint8x16_t, b: uint8x16_t",uint16x8_t,Unsigned add long
+TRUE,vaddl_s16,"a: int16x4_t, b: int16x4_t",int32x4_t,Signed add long
+TRUE,vaddl_s32,"a: int32x2_t, b: int32x2_t",int64x2_t,Signed add long
+TRUE,vaddl_s8,"a: int8x8_t, b: int8x8_t",int16x8_t,Signed add long
+TRUE,vaddl_u16,"a: uint16x4_t, b: uint16x4_t",uint32x4_t,Unsigned add long
+TRUE,vaddl_u32,"a: uint32x2_t, b: uint32x2_t",uint64x2_t,Unsigned add long
+TRUE,vaddl_u8,"a: uint8x8_t, b: uint8x8_t",uint16x8_t,Unsigned add long
+FALSE,vaddlv_s16,a: int16x4_t,i32,Signed add long across vector
+FALSE,vaddlv_s32,a: int32x2_t,i64,Signed add long pairwise
+FALSE,vaddlv_s8,a: int8x8_t,i16,Signed add long across vector
+FALSE,vaddlv_u16,a: uint16x4_t,u32,Unsigned sum long across vector
+FALSE,vaddlv_u32,a: uint32x2_t,u64,Unsigned add long pairwise
+FALSE,vaddlv_u8,a: uint8x8_t,u16,Unsigned sum long across vector
+FALSE,vaddlvq_s16,a: int16x8_t,i32,Signed add long across vector
+FALSE,vaddlvq_s32,a: int32x4_t,i64,Signed add long across vector
+FALSE,vaddlvq_s8,a: int8x16_t,i16,Signed add long across vector
+FALSE,vaddlvq_u16,a: uint16x8_t,u32,Unsigned sum long across vector
+FALSE,vaddlvq_u32,a: uint32x4_t,u64,Unsigned sum long across vector
+FALSE,vaddlvq_u8,a: uint8x16_t,u16,Unsigned sum long across vector
+FALSE,vaddq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point add
+TRUE,vaddq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point add
+TRUE,vaddq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point add
+FALSE,vaddq_p128,"a: poly128_t, b: poly128_t",poly128_t,Bitwise exclusive OR
+FALSE,vaddq_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Bitwise exclusive OR
+FALSE,vaddq_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Bitwise exclusive OR
+FALSE,vaddq_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Bitwise exclusive OR
+TRUE,vaddq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Add
+TRUE,vaddq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Add
+TRUE,vaddq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Add
+TRUE,vaddq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Add
+TRUE,vaddq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Add
+TRUE,vaddq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Add
+TRUE,vaddq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Add
+TRUE,vaddq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Add
+FALSE,vaddv_f32,a: float32x2_t,f32,Floating-point add across vector
+TRUE,vaddv_s16,a: int16x4_t,i16,Add across vector
+TRUE,vaddv_s32,a: int32x2_t,i32,Add across vector
+TRUE,vaddv_s8,a: int8x8_t,i8,Add across vector
+TRUE,vaddv_u16,a: uint16x4_t,u16,Add across vector
+TRUE,vaddv_u32,a: uint32x2_t,u32,Add across vector
+TRUE,vaddv_u8,a: uint8x8_t,u8,Add across vector
+FALSE,vaddvq_f32,a: float32x4_t,f32,Floating-point add across vector
+FALSE,vaddvq_f64,a: float64x2_t,float64_t,Floating-point add across vector
+TRUE,vaddvq_s16,a: int16x8_t,i16,Add across vector
+TRUE,vaddvq_s32,a: int32x4_t,i32,Add across vector
+TRUE,vaddvq_s64,a: int64x2_t,i64,Add across vector
+TRUE,vaddvq_s8,a: int8x16_t,i8,Add across vector
+TRUE,vaddvq_u16,a: uint16x8_t,u16,Add across vector
+TRUE,vaddvq_u32,a: uint32x4_t,u32,Add across vector
+TRUE,vaddvq_u64,a: uint64x2_t,u64,Add across vector
+TRUE,vaddvq_u8,a: uint8x16_t,u8,Add across vector
+TRUE,vaddw_high_s16,"a: int32x4_t, b: int16x8_t",int32x4_t,Signed add wide
+TRUE,vaddw_high_s32,"a: int64x2_t, b: int32x4_t",int64x2_t,Signed add wide
+TRUE,vaddw_high_s8,"a: int16x8_t, b: int8x16_t",int16x8_t,Signed add wide
+TRUE,vaddw_high_u16,"a: uint32x4_t, b: uint16x8_t",uint32x4_t,Unsigned add wide
+TRUE,vaddw_high_u32,"a: uint64x2_t, b: uint32x4_t",uint64x2_t,Unsigned add wide
+TRUE,vaddw_high_u8,"a: uint16x8_t, b: uint8x16_t",uint16x8_t,Unsigned add wide
+TRUE,vaddw_s16,"a: int32x4_t, b: int16x4_t",int32x4_t,Signed add wide
+TRUE,vaddw_s32,"a: int64x2_t, b: int32x2_t",int64x2_t,Signed add wide
+TRUE,vaddw_s8,"a: int16x8_t, b: int8x8_t",int16x8_t,Signed add wide
+TRUE,vaddw_u16,"a: uint32x4_t, b: uint16x4_t",uint32x4_t,Unsigned add wide
+TRUE,vaddw_u32,"a: uint64x2_t, b: uint32x2_t",uint64x2_t,Unsigned add wide
+TRUE,vaddw_u8,"a: uint16x8_t, b: uint8x8_t",uint16x8_t,Unsigned add wide
+TRUE,vaesdq_u8,"data: uint8x16_t, key: uint8x16_t",uint8x16_t,AES single round decryption
+TRUE,vaeseq_u8,"data: uint8x16_t, key: uint8x16_t",uint8x16_t,AES single round encryption
+TRUE,vaesimcq_u8,data: uint8x16_t,uint8x16_t,AES inverse mix columns
+TRUE,vaesmcq_u8,data: uint8x16_t,uint8x16_t,AES mix columns
+TRUE,vand_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Bitwise AND
+TRUE,vand_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Bitwise AND
+TRUE,vand_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Bitwise AND
+TRUE,vand_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Bitwise AND
+TRUE,vand_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Bitwise AND
+TRUE,vand_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Bitwise AND
+TRUE,vand_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Bitwise AND
+TRUE,vand_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Bitwise AND
+TRUE,vandq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Bitwise AND
+TRUE,vandq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Bitwise AND
+TRUE,vandq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Bitwise AND
+TRUE,vandq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Bitwise AND
+TRUE,vandq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Bitwise AND
+TRUE,vandq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Bitwise AND
+TRUE,vandq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Bitwise AND
+TRUE,vandq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Bitwise AND
+FALSE,vbcaxq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Bit clear and exclusive OR
+FALSE,vbcaxq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Bit clear and exclusive OR
+FALSE,vbcaxq_s64,"a: int64x2_t, b: int64x2_t, c: int64x2_t",int64x2_t,Bit clear and exclusive OR
+FALSE,vbcaxq_s8,"a: int8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Bit clear and exclusive OR
+FALSE,vbcaxq_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Bit clear and exclusive OR
+FALSE,vbcaxq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Bit clear and exclusive OR
+FALSE,vbcaxq_u64,"a: uint64x2_t, b: uint64x2_t, c: uint64x2_t",uint64x2_t,Bit clear and exclusive OR
+FALSE,vbcaxq_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Bit clear and exclusive OR
+FALSE,vbfdot_f32,"r: float32x2_t, a: bfloat16x4_t, b: bfloat16x4_t",float32x2_t,Bfloat16 floating-point dot product
+FALSE,vbfdot_lane_f32,"r: float32x2_t, a: bfloat16x4_t, b: bfloat16x4_t, lane: const int",float32x2_t,Bfloat16 floating-point dot product
+FALSE,vbfdot_laneq_f32,"r: float32x2_t, a: bfloat16x4_t, b: bfloat16x8_t, lane: const int",float32x2_t,Bfloat16 floating-point dot product
+FALSE,vbfdotq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t",float32x4_t,Bfloat16 floating-point dot product
+FALSE,vbfdotq_lane_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x4_t, lane: const int",float32x4_t,Bfloat16 floating-point dot product
+FALSE,vbfdotq_laneq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t, lane: const int",float32x4_t,Bfloat16 floating-point dot product
+FALSE,vbfmlalbq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmlalbq_lane_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x4_t, lane: const int",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmlalbq_laneq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t, lane: const int",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmlaltq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmlaltq_lane_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x4_t, lane: const int",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmlaltq_laneq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t, lane: const int",float32x4_t,Bfloat16 floating-point widening multiply-add long
+FALSE,vbfmmlaq_f32,"r: float32x4_t, a: bfloat16x8_t, b: bfloat16x8_t",float32x4_t,Bfloat16 floating-point matrix multiply-accumulate into 2x2 matrix
+TRUE,vbic_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbic_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,"Bitwise bit clear (vector, immediate)"
+TRUE,vbicq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,"Bitwise bit clear (vector, immediate)"
+FALSE,vbsl_f16,"a: uint16x4_t, b: float16x4_t, c: float16x4_t",float16x4_t,Bitwise select
+TRUE,vbsl_f32,"a: uint32x2_t, b: float32x2_t, c: float32x2_t",float32x2_t,Bitwise select
+TRUE,vbsl_f64,"a: uint64x1_t, b: float64x1_t, c: float64x1_t",float64x1_t,Bitwise select
+TRUE,vbsl_p16,"a: uint16x4_t, b: poly16x4_t, c: poly16x4_t",poly16x4_t,Bitwise select
+TRUE,vbsl_p64,"a: poly64x1_t, b: poly64x1_t, c: poly64x1_t",poly64x1_t,Bitwise select
+TRUE,vbsl_p8,"a: uint8x8_t, b: poly8x8_t, c: poly8x8_t",poly8x8_t,Bitwise select
+TRUE,vbsl_s16,"a: uint16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Bitwise select
+TRUE,vbsl_s32,"a: uint32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Bitwise select
+TRUE,vbsl_s64,"a: uint64x1_t, b: int64x1_t, c: int64x1_t",int64x1_t,Bitwise select
+TRUE,vbsl_s8,"a: uint8x8_t, b: int8x8_t, c: int8x8_t",int8x8_t,Bitwise select
+TRUE,vbsl_u16,"a: uint16x4_t, b: uint16x4_t, c: uint16x4_t",uint16x4_t,Bitwise select
+TRUE,vbsl_u32,"a: uint32x2_t, b: uint32x2_t, c: uint32x2_t",uint32x2_t,Bitwise select
+TRUE,vbsl_u64,"a: uint64x1_t, b: uint64x1_t, c: uint64x1_t",uint64x1_t,Bitwise select
+TRUE,vbsl_u8,"a: uint8x8_t, b: uint8x8_t, c: uint8x8_t",uint8x8_t,Bitwise select
+FALSE,vbslq_f16,"a: uint16x8_t, b: float16x8_t, c: float16x8_t",float16x8_t,Bitwise select
+TRUE,vbslq_f32,"a: uint32x4_t, b: float32x4_t, c: float32x4_t",float32x4_t,Bitwise select
+TRUE,vbslq_f64,"a: uint64x2_t, b: float64x2_t, c: float64x2_t",float64x2_t,Bitwise select
+TRUE,vbslq_p16,"a: uint16x8_t, b: poly16x8_t, c: poly16x8_t",poly16x8_t,Bitwise select
+TRUE,vbslq_p64,"a: poly64x2_t, b: poly64x2_t, c: poly64x2_t",poly64x2_t,Bitwise select
+TRUE,vbslq_p8,"a: uint8x16_t, b: poly8x16_t, c: poly8x16_t",poly8x16_t,Bitwise select
+TRUE,vbslq_s16,"a: uint16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Bitwise select
+TRUE,vbslq_s32,"a: uint32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Bitwise select
+TRUE,vbslq_s64,"a: uint64x2_t, b: int64x2_t, c: int64x2_t",int64x2_t,Bitwise select
+TRUE,vbslq_s8,"a: uint8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Bitwise select
+TRUE,vbslq_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Bitwise select
+TRUE,vbslq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Bitwise select
+TRUE,vbslq_u64,"a: uint64x2_t, b: uint64x2_t, c: uint64x2_t",uint64x2_t,Bitwise select
+TRUE,vbslq_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Bitwise select
+FALSE,vcadd_rot270_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex add
+FALSE,vcadd_rot270_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex add
+FALSE,vcadd_rot90_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex add
+FALSE,vcadd_rot90_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex add
+FALSE,vcaddq_rot270_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex add
+FALSE,vcaddq_rot270_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex add
+FALSE,vcaddq_rot270_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex add
+FALSE,vcaddq_rot90_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex add
+FALSE,vcaddq_rot90_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex add
+FALSE,vcaddq_rot90_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex add
+FALSE,vcage_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point absolute compare greater than or equal
+TRUE,vcage_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point absolute compare greater than or equal
+TRUE,vcage_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point absolute compare greater than or equal
+FALSE,vcaged_f64,"a: float64_t, b: float64_t",u64,Floating-point absolute compare greater than or equal
+FALSE,vcageh_f16,"a: float16_t, b: float16_t",u16,Floating-point absolute compare greater than or equal
+FALSE,vcageq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point absolute compare greater than or equal
+TRUE,vcageq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point absolute compare greater than or equal
+TRUE,vcageq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point absolute compare greater than or equal
+FALSE,vcages_f32,"a: f32, b: f32",u32,Floating-point absolute compare greater than or equal
+FALSE,vcagt_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point absolute compare greater than
+TRUE,vcagt_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point absolute compare greater than
+TRUE,vcagt_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point absolute compare greater than
+FALSE,vcagtd_f64,"a: float64_t, b: float64_t",u64,Floating-point absolute compare greater than
+FALSE,vcagth_f16,"a: float16_t, b: float16_t",u16,Floating-point absolute compare greater than
+FALSE,vcagtq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point absolute compare greater than
+TRUE,vcagtq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point absolute compare greater than
+TRUE,vcagtq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point absolute compare greater than
+FALSE,vcagts_f32,"a: f32, b: f32",u32,Floating-point absolute compare greater than
+FALSE,vcale_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point absolute compare less than or equal
+TRUE,vcale_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point absolute compare less than or equal
+TRUE,vcale_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point absolute compare less than or equal
+FALSE,vcaled_f64,"a: float64_t, b: float64_t",u64,Floating-point absolute compare less than or equal
+FALSE,vcaleh_f16,"a: float16_t, b: float16_t",u16,Floating-point absolute compare less than or equal
+FALSE,vcaleq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point absolute compare less than or equal
+TRUE,vcaleq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point absolute compare less than or equal
+TRUE,vcaleq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point absolute compare less than or equal
+FALSE,vcales_f32,"a: f32, b: f32",u32,Floating-point absolute compare less than or equal
+FALSE,vcalt_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point absolute compare less than
+TRUE,vcalt_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point absolute compare less than
+TRUE,vcalt_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point absolute compare less than
+FALSE,vcaltd_f64,"a: float64_t, b: float64_t",u64,Floating-point absolute compare less than
+FALSE,vcalth_f16,"a: float16_t, b: float16_t",u16,Floating-point absolute compare less than
+FALSE,vcaltq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point absolute compare less than
+TRUE,vcaltq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point absolute compare less than
+TRUE,vcaltq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point absolute compare less than
+FALSE,vcalts_f32,"a: f32, b: f32",u32,Floating-point absolute compare less than
+FALSE,vceq_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point compare equal
+TRUE,vceq_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point compare equal
+TRUE,vceq_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point compare equal
+TRUE,vceq_p64,"a: poly64x1_t, b: poly64x1_t",uint64x1_t,Compare bitwise equal
+FALSE,vceq_p8,"a: poly8x8_t, b: poly8x8_t",uint8x8_t,Compare bitwise equal
+TRUE,vceq_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare bitwise equal
+TRUE,vceq_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare bitwise equal
+TRUE,vceq_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare bitwise equal
+TRUE,vceq_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare bitwise equal
+TRUE,vceq_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare bitwise equal
+TRUE,vceq_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare bitwise equal
+TRUE,vceq_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare bitwise equal
+TRUE,vceq_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare bitwise equal
+FALSE,vceqd_f64,"a: float64_t, b: float64_t",u64,Floating-point compare equal
+FALSE,vceqd_s64,"a: i64, b: i64",u64,Compare bitwise equal
+FALSE,vceqd_u64,"a: u64, b: u64",u64,Compare bitwise equal
+FALSE,vceqh_f16,"a: float16_t, b: float16_t",u16,Floating-point compare equal
+FALSE,vceqq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point compare equal
+TRUE,vceqq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point compare equal
+TRUE,vceqq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point compare equal
+TRUE,vceqq_p64,"a: poly64x2_t, b: poly64x2_t",uint64x2_t,Compare bitwise equal
+TRUE,vceqq_p8,"a: poly8x16_t, b: poly8x16_t",uint8x16_t,Compare bitwise equal
+TRUE,vceqq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare bitwise equal
+TRUE,vceqq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare bitwise equal
+TRUE,vceqq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare bitwise equal
+TRUE,vceqq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare bitwise equal
+TRUE,vceqq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare bitwise equal
+TRUE,vceqq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare bitwise equal
+TRUE,vceqq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare bitwise equal
+TRUE,vceqq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare bitwise equal
+FALSE,vceqs_f32,"a: f32, b: f32",u32,Floating-point compare equal
+FALSE,vceqz_f16,a: float16x4_t,uint16x4_t,Floating-point compare equal to zero
+TRUE,vceqz_f32,a: float32x2_t,uint32x2_t,Floating-point compare equal to zero
+TRUE,vceqz_f64,a: float64x1_t,uint64x1_t,Floating-point compare equal to zero
+TRUE,vceqz_p64,a: poly64x1_t,uint64x1_t,Compare bitwise equal to zero
+TRUE,vceqz_p8,a: poly8x8_t,uint8x8_t,Compare bitwise equal to zero
+TRUE,vceqz_s16,a: int16x4_t,uint16x4_t,Compare bitwise equal to zero
+TRUE,vceqz_s32,a: int32x2_t,uint32x2_t,Compare bitwise equal to zero
+TRUE,vceqz_s64,a: int64x1_t,uint64x1_t,Compare bitwise equal to zero
+TRUE,vceqz_s8,a: int8x8_t,uint8x8_t,Compare bitwise equal to zero
+TRUE,vceqz_u16,a: uint16x4_t,uint16x4_t,Compare bitwise equal to zero
+TRUE,vceqz_u32,a: uint32x2_t,uint32x2_t,Compare bitwise equal to zero
+TRUE,vceqz_u64,a: uint64x1_t,uint64x1_t,Compare bitwise equal to zero
+TRUE,vceqz_u8,a: uint8x8_t,uint8x8_t,Compare bitwise equal to zero
+FALSE,vceqzd_f64,a: float64_t,u64,Floating-point compare equal to zero
+FALSE,vceqzd_s64,a: i64,u64,Compare bitwise equal to zero
+FALSE,vceqzd_u64,a: u64,u64,Compare bitwise equal to zero
+FALSE,vceqzh_f16,a: float16_t,u16,Floating-point compare equal to zero
+FALSE,vceqzq_f16,a: float16x8_t,uint16x8_t,Floating-point compare equal to zero
+TRUE,vceqzq_f32,a: float32x4_t,uint32x4_t,Floating-point compare equal to zero
+TRUE,vceqzq_f64,a: float64x2_t,uint64x2_t,Floating-point compare equal to zero
+TRUE,vceqzq_p64,a: poly64x2_t,uint64x2_t,Compare bitwise equal to zero
+TRUE,vceqzq_p8,a: poly8x16_t,uint8x16_t,Compare bitwise equal to zero
+TRUE,vceqzq_s16,a: int16x8_t,uint16x8_t,Compare bitwise equal to zero
+TRUE,vceqzq_s32,a: int32x4_t,uint32x4_t,Compare bitwise equal to zero
+TRUE,vceqzq_s64,a: int64x2_t,uint64x2_t,Compare bitwise equal to zero
+TRUE,vceqzq_s8,a: int8x16_t,uint8x16_t,Compare bitwise equal to zero
+TRUE,vceqzq_u16,a: uint16x8_t,uint16x8_t,Compare bitwise equal to zero
+TRUE,vceqzq_u32,a: uint32x4_t,uint32x4_t,Compare bitwise equal to zero
+TRUE,vceqzq_u64,a: uint64x2_t,uint64x2_t,Compare bitwise equal to zero
+TRUE,vceqzq_u8,a: uint8x16_t,uint8x16_t,Compare bitwise equal to zero
+FALSE,vceqzs_f32,a: f32,u32,Floating-point compare equal to zero
+FALSE,vcge_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point compare greater than or equal
+TRUE,vcge_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point compare greater than or equal
+TRUE,vcge_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point compare greater than or equal
+TRUE,vcge_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare signed greater than or equal
+TRUE,vcge_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare signed greater than or equal
+TRUE,vcge_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare signed greater than or equal
+TRUE,vcge_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare signed greater than or equal
+TRUE,vcge_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare unsigned higher or same
+TRUE,vcge_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare unsigned higher or same
+TRUE,vcge_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare unsigned higher or same
+TRUE,vcge_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare unsigned higher or same
+FALSE,vcged_f64,"a: float64_t, b: float64_t",u64,Floating-point compare greater than or equal
+FALSE,vcged_s64,"a: i64, b: i64",u64,Compare signed greater than or equal
+FALSE,vcged_u64,"a: u64, b: u64",u64,Compare unsigned higher or same
+FALSE,vcgeh_f16,"a: float16_t, b: float16_t",u16,Floating-point compare greater than or equal
+FALSE,vcgeq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point compare greater than or equal
+TRUE,vcgeq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point compare greater than or equal
+TRUE,vcgeq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point compare greater than or equal
+TRUE,vcgeq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare signed greater than or equal
+TRUE,vcgeq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare signed greater than or equal
+TRUE,vcgeq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare signed greater than or equal
+TRUE,vcgeq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare signed greater than or equal
+TRUE,vcgeq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare unsigned higher or same
+TRUE,vcgeq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare unsigned higher or same
+TRUE,vcgeq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare unsigned higher or same
+TRUE,vcgeq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare unsigned higher or same
+FALSE,vcges_f32,"a: f32, b: f32",u32,Floating-point compare greater than or equal
+FALSE,vcgez_f16,a: float16x4_t,uint16x4_t,Floating-point compare greater than or equal to zero
+TRUE,vcgez_f32,a: float32x2_t,uint32x2_t,Floating-point compare greater than or equal to zero
+TRUE,vcgez_f64,a: float64x1_t,uint64x1_t,Floating-point compare greater than or equal to zero
+TRUE,vcgez_s16,a: int16x4_t,uint16x4_t,Compare signed greater than or equal to zero
+TRUE,vcgez_s32,a: int32x2_t,uint32x2_t,Compare signed greater than or equal to zero
+TRUE,vcgez_s64,a: int64x1_t,uint64x1_t,Compare signed greater than or equal to zero
+TRUE,vcgez_s8,a: int8x8_t,uint8x8_t,Compare signed greater than or equal to zero
+FALSE,vcgezd_f64,a: float64_t,u64,Floating-point compare greater than or equal to zero
+FALSE,vcgezd_s64,a: i64,u64,Compare signed greater than or equal to zero
+FALSE,vcgezh_f16,a: float16_t,u16,Floating-point compare greater than or equal to zero
+FALSE,vcgezq_f16,a: float16x8_t,uint16x8_t,Floating-point compare greater than or equal to zero
+TRUE,vcgezq_f32,a: float32x4_t,uint32x4_t,Floating-point compare greater than or equal to zero
+TRUE,vcgezq_f64,a: float64x2_t,uint64x2_t,Floating-point compare greater than or equal to zero
+TRUE,vcgezq_s16,a: int16x8_t,uint16x8_t,Compare signed greater than or equal to zero
+TRUE,vcgezq_s32,a: int32x4_t,uint32x4_t,Compare signed greater than or equal to zero
+TRUE,vcgezq_s64,a: int64x2_t,uint64x2_t,Compare signed greater than or equal to zero
+TRUE,vcgezq_s8,a: int8x16_t,uint8x16_t,Compare signed greater than or equal to zero
+FALSE,vcgezs_f32,a: f32,u32,Floating-point compare greater than or equal to zero
+FALSE,vcgt_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point compare greater than
+TRUE,vcgt_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point compare greater than
+TRUE,vcgt_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point compare greater than
+TRUE,vcgt_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare signed greater than
+TRUE,vcgt_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare signed greater than
+TRUE,vcgt_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare signed greater than
+TRUE,vcgt_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare signed greater than
+TRUE,vcgt_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare unsigned higher
+TRUE,vcgt_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare unsigned higher
+TRUE,vcgt_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare unsigned higher
+TRUE,vcgt_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare unsigned higher
+FALSE,vcgtd_f64,"a: float64_t, b: float64_t",u64,Floating-point compare greater than
+FALSE,vcgtd_s64,"a: i64, b: i64",u64,Compare signed greater than
+FALSE,vcgtd_u64,"a: u64, b: u64",u64,Compare unsigned higher
+FALSE,vcgth_f16,"a: float16_t, b: float16_t",u16,Floating-point compare greater than
+FALSE,vcgtq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point compare greater than
+TRUE,vcgtq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point compare greater than
+TRUE,vcgtq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point compare greater than
+TRUE,vcgtq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare signed greater than
+TRUE,vcgtq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare signed greater than
+TRUE,vcgtq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare signed greater than
+TRUE,vcgtq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare signed greater than
+TRUE,vcgtq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare unsigned higher
+TRUE,vcgtq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare unsigned higher
+TRUE,vcgtq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare unsigned higher
+TRUE,vcgtq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare unsigned higher
+FALSE,vcgts_f32,"a: f32, b: f32",u32,Floating-point compare greater than
+FALSE,vcgtz_f16,a: float16x4_t,uint16x4_t,Floating-point compare greater than zero
+TRUE,vcgtz_f32,a: float32x2_t,uint32x2_t,Floating-point compare greater than zero
+TRUE,vcgtz_f64,a: float64x1_t,uint64x1_t,Floating-point compare greater than zero
+TRUE,vcgtz_s16,a: int16x4_t,uint16x4_t,Compare signed greater than zero
+TRUE,vcgtz_s32,a: int32x2_t,uint32x2_t,Compare signed greater than zero
+TRUE,vcgtz_s64,a: int64x1_t,uint64x1_t,Compare signed greater than zero
+TRUE,vcgtz_s8,a: int8x8_t,uint8x8_t,Compare signed greater than zero
+FALSE,vcgtzd_f64,a: float64_t,u64,Floating-point compare greater than zero
+FALSE,vcgtzd_s64,a: i64,u64,Compare signed greater than zero
+FALSE,vcgtzh_f16,a: float16_t,u16,Floating-point compare greater than zero
+FALSE,vcgtzq_f16,a: float16x8_t,uint16x8_t,Floating-point compare greater than zero
+TRUE,vcgtzq_f32,a: float32x4_t,uint32x4_t,Floating-point compare greater than zero
+TRUE,vcgtzq_f64,a: float64x2_t,uint64x2_t,Floating-point compare greater than zero
+TRUE,vcgtzq_s16,a: int16x8_t,uint16x8_t,Compare signed greater than zero
+TRUE,vcgtzq_s32,a: int32x4_t,uint32x4_t,Compare signed greater than zero
+TRUE,vcgtzq_s64,a: int64x2_t,uint64x2_t,Compare signed greater than zero
+TRUE,vcgtzq_s8,a: int8x16_t,uint8x16_t,Compare signed greater than zero
+FALSE,vcgtzs_f32,a: f32,u32,Floating-point compare greater than zero
+FALSE,vcle_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point compare less than or equal
+TRUE,vcle_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point compare less than or equal
+TRUE,vcle_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point compare less than or equal
+TRUE,vcle_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare signed less than or equal
+TRUE,vcle_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare signed less than or equal
+TRUE,vcle_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare signed less than or equal
+TRUE,vcle_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare signed less than or equal
+TRUE,vcle_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare unsigned less than or equal
+TRUE,vcle_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare unsigned less than or equal
+TRUE,vcle_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare unsigned less than or equal
+TRUE,vcle_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare unsigned less than or equal
+FALSE,vcled_f64,"a: float64_t, b: float64_t",u64,Floating-point compare less than or equal
+FALSE,vcled_s64,"a: i64, b: i64",u64,Compare signed less than or equal
+FALSE,vcled_u64,"a: u64, b: u64",u64,Compare unsigned less than or equal
+FALSE,vcleh_f16,"a: float16_t, b: float16_t",u16,Floating-point compare less than or equal
+FALSE,vcleq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point compare less than or equal
+TRUE,vcleq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point compare less than or equal
+TRUE,vcleq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point compare less than or equal
+TRUE,vcleq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare signed less than or equal
+TRUE,vcleq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare signed less than or equal
+TRUE,vcleq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare signed less than or equal
+TRUE,vcleq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare signed less than or equal
+TRUE,vcleq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare unsigned less than or equal
+TRUE,vcleq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare unsigned less than or equal
+TRUE,vcleq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare unsigned less than or equal
+TRUE,vcleq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare unsigned less than or equal
+FALSE,vcles_f32,"a: f32, b: f32",u32,Floating-point compare less than or equal
+FALSE,vclez_f16,a: float16x4_t,uint16x4_t,Floating-point compare less than or equal to zero
+TRUE,vclez_f32,a: float32x2_t,uint32x2_t,Floating-point compare less than or equal to zero
+TRUE,vclez_f64,a: float64x1_t,uint64x1_t,Floating-point compare less than or equal to zero
+TRUE,vclez_s16,a: int16x4_t,uint16x4_t,Compare signed less than or equal to zero
+TRUE,vclez_s32,a: int32x2_t,uint32x2_t,Compare signed less than or equal to zero
+TRUE,vclez_s64,a: int64x1_t,uint64x1_t,Compare signed less than or equal to zero
+TRUE,vclez_s8,a: int8x8_t,uint8x8_t,Compare signed less than or equal to zero
+FALSE,vclezd_f64,a: float64_t,u64,Floating-point compare less than or equal to zero
+FALSE,vclezd_s64,a: i64,u64,Compare signed less than or equal to zero
+FALSE,vclezh_f16,a: float16_t,u16,Floating-point compare less than or equal to zero
+FALSE,vclezq_f16,a: float16x8_t,uint16x8_t,Floating-point compare less than or equal to zero
+TRUE,vclezq_f32,a: float32x4_t,uint32x4_t,Floating-point compare less than or equal to zero
+TRUE,vclezq_f64,a: float64x2_t,uint64x2_t,Floating-point compare less than or equal to zero
+TRUE,vclezq_s16,a: int16x8_t,uint16x8_t,Compare signed less than or equal to zero
+TRUE,vclezq_s32,a: int32x4_t,uint32x4_t,Compare signed less than or equal to zero
+TRUE,vclezq_s64,a: int64x2_t,uint64x2_t,Compare signed less than or equal to zero
+TRUE,vclezq_s8,a: int8x16_t,uint8x16_t,Compare signed less than or equal to zero
+FALSE,vclezs_f32,a: f32,u32,Floating-point compare less than or equal to zero
+TRUE,vcls_s16,a: int16x4_t,int16x4_t,Count leading sign bits
+TRUE,vcls_s32,a: int32x2_t,int32x2_t,Count leading sign bits
+TRUE,vcls_s8,a: int8x8_t,int8x8_t,Count leading sign bits
+TRUE,vclsq_s16,a: int16x8_t,int16x8_t,Count leading sign bits
+TRUE,vclsq_s32,a: int32x4_t,int32x4_t,Count leading sign bits
+TRUE,vclsq_s8,a: int8x16_t,int8x16_t,Count leading sign bits
+FALSE,vclt_f16,"a: float16x4_t, b: float16x4_t",uint16x4_t,Floating-point compare less than
+TRUE,vclt_f32,"a: float32x2_t, b: float32x2_t",uint32x2_t,Floating-point compare less than
+TRUE,vclt_f64,"a: float64x1_t, b: float64x1_t",uint64x1_t,Floating-point compare less than
+TRUE,vclt_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare signed less than
+TRUE,vclt_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare signed less than
+TRUE,vclt_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare signed less than
+TRUE,vclt_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare signed less than
+TRUE,vclt_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare unsigned less than
+TRUE,vclt_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare unsigned less than
+TRUE,vclt_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare unsigned less than
+TRUE,vclt_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare unsigned less than
+FALSE,vcltd_f64,"a: float64_t, b: float64_t",u64,Floating-point compare less than
+FALSE,vcltd_s64,"a: i64, b: i64",u64,Compare signed less than
+FALSE,vcltd_u64,"a: u64, b: u64",u64,Compare unsigned less than
+FALSE,vclth_f16,"a: float16_t, b: float16_t",u16,Floating-point compare less than
+FALSE,vcltq_f16,"a: float16x8_t, b: float16x8_t",uint16x8_t,Floating-point compare less than
+TRUE,vcltq_f32,"a: float32x4_t, b: float32x4_t",uint32x4_t,Floating-point compare less than
+TRUE,vcltq_f64,"a: float64x2_t, b: float64x2_t",uint64x2_t,Floating-point compare less than
+TRUE,vcltq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare signed less than
+TRUE,vcltq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare signed less than
+TRUE,vcltq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare signed less than
+TRUE,vcltq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare signed less than
+TRUE,vcltq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare unsigned less than
+TRUE,vcltq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare unsigned less than
+TRUE,vcltq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare unsigned less than
+TRUE,vcltq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare unsigned less than
+FALSE,vclts_f32,"a: f32, b: f32",u32,Floating-point compare less than
+FALSE,vcltz_f16,a: float16x4_t,uint16x4_t,Floating-point compare less than zero
+TRUE,vcltz_f32,a: float32x2_t,uint32x2_t,Floating-point compare less than zero
+TRUE,vcltz_f64,a: float64x1_t,uint64x1_t,Floating-point compare less than zero
+TRUE,vcltz_s16,a: int16x4_t,uint16x4_t,Compare signed less than zero
+TRUE,vcltz_s32,a: int32x2_t,uint32x2_t,Compare signed less than zero
+TRUE,vcltz_s64,a: int64x1_t,uint64x1_t,Compare signed less than zero
+TRUE,vcltz_s8,a: int8x8_t,uint8x8_t,Compare signed less than zero
+FALSE,vcltzd_f64,a: float64_t,u64,Floating-point compare less than zero
+FALSE,vcltzd_s64,a: i64,u64,Compare signed less than zero
+FALSE,vcltzh_f16,a: float16_t,u16,Floating-point compare less than zero
+FALSE,vcltzq_f16,a: float16x8_t,uint16x8_t,Floating-point compare less than zero
+TRUE,vcltzq_f32,a: float32x4_t,uint32x4_t,Floating-point compare less than zero
+TRUE,vcltzq_f64,a: float64x2_t,uint64x2_t,Floating-point compare less than zero
+TRUE,vcltzq_s16,a: int16x8_t,uint16x8_t,Compare signed less than zero
+TRUE,vcltzq_s32,a: int32x4_t,uint32x4_t,Compare signed less than zero
+TRUE,vcltzq_s64,a: int64x2_t,uint64x2_t,Compare signed less than zero
+TRUE,vcltzq_s8,a: int8x16_t,uint8x16_t,Compare signed less than zero
+FALSE,vcltzs_f32,a: f32,u32,Floating-point compare less than zero
+TRUE,vclz_s16,a: int16x4_t,int16x4_t,Count leading zero bits
+TRUE,vclz_s32,a: int32x2_t,int32x2_t,Count leading zero bits
+TRUE,vclz_s8,a: int8x8_t,int8x8_t,Count leading zero bits
+TRUE,vclz_u16,a: uint16x4_t,uint16x4_t,Count leading zero bits
+TRUE,vclz_u32,a: uint32x2_t,uint32x2_t,Count leading zero bits
+TRUE,vclz_u8,a: uint8x8_t,uint8x8_t,Count leading zero bits
+TRUE,vclzq_s16,a: int16x8_t,int16x8_t,Count leading zero bits
+TRUE,vclzq_s32,a: int32x4_t,int32x4_t,Count leading zero bits
+TRUE,vclzq_s8,a: int8x16_t,int8x16_t,Count leading zero bits
+TRUE,vclzq_u16,a: uint16x8_t,uint16x8_t,Count leading zero bits
+TRUE,vclzq_u32,a: uint32x4_t,uint32x4_t,Count leading zero bits
+TRUE,vclzq_u8,a: uint8x16_t,uint8x16_t,Count leading zero bits
+FALSE,vcmla_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_lane_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_lane_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_laneq_f32,"r: float32x2_t, a: float32x2_t, b: float32x4_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_lane_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_lane_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot180_laneq_f32,"r: float32x2_t, a: float32x2_t, b: float32x4_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_lane_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_lane_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot270_laneq_f32,"r: float32x2_t, a: float32x2_t, b: float32x4_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_lane_f16,"r: float16x4_t, a: float16x4_t, b: float16x4_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_lane_f32,"r: float32x2_t, a: float32x2_t, b: float32x2_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_laneq_f16,"r: float16x4_t, a: float16x4_t, b: float16x8_t, lane: const int",float16x4_t,Floating-point complex multiply accumulate
+FALSE,vcmla_rot90_laneq_f32,"r: float32x2_t, a: float32x2_t, b: float32x4_t, lane: const int",float32x2_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_f64,"r: float64x2_t, a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_lane_f16,"r: float16x8_t, a: float16x8_t, b: float16x4_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_lane_f32,"r: float32x4_t, a: float32x4_t, b: float32x2_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_laneq_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_laneq_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_f64,"r: float64x2_t, a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_lane_f16,"r: float16x8_t, a: float16x8_t, b: float16x4_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_lane_f32,"r: float32x4_t, a: float32x4_t, b: float32x2_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_laneq_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot180_laneq_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_f64,"r: float64x2_t, a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_lane_f16,"r: float16x8_t, a: float16x8_t, b: float16x4_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_lane_f32,"r: float32x4_t, a: float32x4_t, b: float32x2_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_laneq_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot270_laneq_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_f64,"r: float64x2_t, a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_lane_f16,"r: float16x8_t, a: float16x8_t, b: float16x4_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_lane_f32,"r: float32x4_t, a: float32x4_t, b: float32x2_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_laneq_f16,"r: float16x8_t, a: float16x8_t, b: float16x8_t, lane: const int",float16x8_t,Floating-point complex multiply accumulate
+FALSE,vcmlaq_rot90_laneq_f32,"r: float32x4_t, a: float32x4_t, b: float32x4_t, lane: const int",float32x4_t,Floating-point complex multiply accumulate
+TRUE,vcnt_p8,a: poly8x8_t,poly8x8_t,Population count per byte
+TRUE,vcnt_s8,a: int8x8_t,int8x8_t,Population count per byte
+TRUE,vcnt_u8,a: uint8x8_t,uint8x8_t,Population count per byte
+TRUE,vcntq_p8,a: poly8x16_t,poly8x16_t,Population count per byte
+TRUE,vcntq_s8,a: int8x16_t,int8x16_t,Population count per byte
+TRUE,vcntq_u8,a: uint8x16_t,uint8x16_t,Population count per byte
+FALSE,vcombine_bf16,"low: bfloat16x4_t, high: bfloat16x4_t",bfloat16x8_t,Insert vector element from another vector element
+FALSE,vcombine_f16,"low: float16x4_t, high: float16x4_t",float16x8_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_f32,"low: float32x2_t, high: float32x2_t",float32x4_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_f64,"low: float64x1_t, high: float64x1_t",float64x2_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_p16,"low: poly16x4_t, high: poly16x4_t",poly16x8_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_p64,"low: poly64x1_t, high: poly64x1_t",poly64x2_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_p8,"low: poly8x8_t, high: poly8x8_t",poly8x16_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_s16,"low: int16x4_t, high: int16x4_t",int16x8_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_s32,"low: int32x2_t, high: int32x2_t",int32x4_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_s64,"low: int64x1_t, high: int64x1_t",int64x2_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_s8,"low: int8x8_t, high: int8x8_t",int8x16_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_u16,"low: uint16x4_t, high: uint16x4_t",uint16x8_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_u32,"low: uint32x2_t, high: uint32x2_t",uint32x4_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_u64,"low: uint64x1_t, high: uint64x1_t",uint64x2_t,Join two smaller vectors into a single larger vector
+TRUE,vcombine_u8,"low: uint8x8_t, high: uint8x8_t",uint8x16_t,Join two smaller vectors into a single larger vector
+FALSE,vcopy_lane_bf16,"a: bfloat16x4_t, lane1: const int, b: bfloat16x4_t, lane2: const int",bfloat16x4_t,Insert vector element from another vector element
+FALSE,vcopy_lane_f32,"a: float32x2_t, lane1: const int, b: float32x2_t, lane2: const int",float32x2_t,Insert vector element from another vector element
+FALSE,vcopy_lane_f64,"a: float64x1_t, lane1: const int, b: float64x1_t, lane2: const int",float64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_lane_p16,"a: poly16x4_t, lane1: const int, b: poly16x4_t, lane2: const int",poly16x4_t,Insert vector element from another vector element
+FALSE,vcopy_lane_p64,"a: poly64x1_t, lane1: const int, b: poly64x1_t, lane2: const int",poly64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_lane_p8,"a: poly8x8_t, lane1: const int, b: poly8x8_t, lane2: const int",poly8x8_t,Insert vector element from another vector element
+FALSE,vcopy_lane_s16,"a: int16x4_t, lane1: const int, b: int16x4_t, lane2: const int",int16x4_t,Insert vector element from another vector element
+FALSE,vcopy_lane_s32,"a: int32x2_t, lane1: const int, b: int32x2_t, lane2: const int",int32x2_t,Insert vector element from another vector element
+FALSE,vcopy_lane_s64,"a: int64x1_t, lane1: const int, b: int64x1_t, lane2: const int",int64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_lane_s8,"a: int8x8_t, lane1: const int, b: int8x8_t, lane2: const int",int8x8_t,Insert vector element from another vector element
+FALSE,vcopy_lane_u16,"a: uint16x4_t, lane1: const int, b: uint16x4_t, lane2: const int",uint16x4_t,Insert vector element from another vector element
+FALSE,vcopy_lane_u32,"a: uint32x2_t, lane1: const int, b: uint32x2_t, lane2: const int",uint32x2_t,Insert vector element from another vector element
+FALSE,vcopy_lane_u64,"a: uint64x1_t, lane1: const int, b: uint64x1_t, lane2: const int",uint64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_lane_u8,"a: uint8x8_t, lane1: const int, b: uint8x8_t, lane2: const int",uint8x8_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_bf16,"a: bfloat16x4_t, lane1: const int, b: bfloat16x8_t, lane2: const int",bfloat16x4_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_f32,"a: float32x2_t, lane1: const int, b: float32x4_t, lane2: const int",float32x2_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_f64,"a: float64x1_t, lane1: const int, b: float64x2_t, lane2: const int",float64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_laneq_p16,"a: poly16x4_t, lane1: const int, b: poly16x8_t, lane2: const int",poly16x4_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_p64,"a: poly64x1_t, lane1: const int, b: poly64x2_t, lane2: const int",poly64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_laneq_p8,"a: poly8x8_t, lane1: const int, b: poly8x16_t, lane2: const int",poly8x8_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_s16,"a: int16x4_t, lane1: const int, b: int16x8_t, lane2: const int",int16x4_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_s32,"a: int32x2_t, lane1: const int, b: int32x4_t, lane2: const int",int32x2_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_s64,"a: int64x1_t, lane1: const int, b: int64x2_t, lane2: const int",int64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_laneq_s8,"a: int8x8_t, lane1: const int, b: int8x16_t, lane2: const int",int8x8_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_u16,"a: uint16x4_t, lane1: const int, b: uint16x8_t, lane2: const int",uint16x4_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_u32,"a: uint32x2_t, lane1: const int, b: uint32x4_t, lane2: const int",uint32x2_t,Insert vector element from another vector element
+FALSE,vcopy_laneq_u64,"a: uint64x1_t, lane1: const int, b: uint64x2_t, lane2: const int",uint64x1_t,Duplicate vector element to vector or scalar
+FALSE,vcopy_laneq_u8,"a: uint8x8_t, lane1: const int, b: uint8x16_t, lane2: const int",uint8x8_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_bf16,"a: bfloat16x8_t, lane1: const int, b: bfloat16x4_t, lane2: const int",bfloat16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_f32,"a: float32x4_t, lane1: const int, b: float32x2_t, lane2: const int",float32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_f64,"a: float64x2_t, lane1: const int, b: float64x1_t, lane2: const int",float64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_p16,"a: poly16x8_t, lane1: const int, b: poly16x4_t, lane2: const int",poly16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_p64,"a: poly64x2_t, lane1: const int, b: poly64x1_t, lane2: const int",poly64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_p8,"a: poly8x16_t, lane1: const int, b: poly8x8_t, lane2: const int",poly8x16_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_s16,"a: int16x8_t, lane1: const int, b: int16x4_t, lane2: const int",int16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_s32,"a: int32x4_t, lane1: const int, b: int32x2_t, lane2: const int",int32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_s64,"a: int64x2_t, lane1: const int, b: int64x1_t, lane2: const int",int64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_s8,"a: int8x16_t, lane1: const int, b: int8x8_t, lane2: const int",int8x16_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_u16,"a: uint16x8_t, lane1: const int, b: uint16x4_t, lane2: const int",uint16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_u32,"a: uint32x4_t, lane1: const int, b: uint32x2_t, lane2: const int",uint32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_u64,"a: uint64x2_t, lane1: const int, b: uint64x1_t, lane2: const int",uint64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_lane_u8,"a: uint8x16_t, lane1: const int, b: uint8x8_t, lane2: const int",uint8x16_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_bf16,"a: bfloat16x8_t, lane1: const int, b: bfloat16x8_t, lane2: const int",bfloat16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_f32,"a: float32x4_t, lane1: const int, b: float32x4_t, lane2: const int",float32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_f64,"a: float64x2_t, lane1: const int, b: float64x2_t, lane2: const int",float64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_p16,"a: poly16x8_t, lane1: const int, b: poly16x8_t, lane2: const int",poly16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_p64,"a: poly64x2_t, lane1: const int, b: poly64x2_t, lane2: const int",poly64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_p8,"a: poly8x16_t, lane1: const int, b: poly8x16_t, lane2: const int",poly8x16_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_s16,"a: int16x8_t, lane1: const int, b: int16x8_t, lane2: const int",int16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_s32,"a: int32x4_t, lane1: const int, b: int32x4_t, lane2: const int",int32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_s64,"a: int64x2_t, lane1: const int, b: int64x2_t, lane2: const int",int64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_s8,"a: int8x16_t, lane1: const int, b: int8x16_t, lane2: const int",int8x16_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_u16,"a: uint16x8_t, lane1: const int, b: uint16x8_t, lane2: const int",uint16x8_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_u32,"a: uint32x4_t, lane1: const int, b: uint32x4_t, lane2: const int",uint32x4_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_u64,"a: uint64x2_t, lane1: const int, b: uint64x2_t, lane2: const int",uint64x2_t,Insert vector element from another vector element
+FALSE,vcopyq_laneq_u8,"a: uint8x16_t, lane1: const int, b: uint8x16_t, lane2: const int",uint8x16_t,Insert vector element from another vector element
+FALSE,vcreate_bf16,a: u64,bfloat16x4_t,Insert vector element from another vector element
+FALSE,vcreate_f16,a: u64,float16x4_t,Insert vector element from another vector element
+FALSE,vcreate_f32,a: u64,float32x2_t,Insert vector element from another vector element
+FALSE,vcreate_f64,a: u64,float64x1_t,Insert vector element from another vector element
+FALSE,vcreate_p16,a: u64,poly16x4_t,Insert vector element from another vector element
+FALSE,vcreate_p64,a: u64,poly64x1_t,Insert vector element from another vector element
+FALSE,vcreate_p8,a: u64,poly8x8_t,Insert vector element from another vector element
+FALSE,vcreate_s16,a: u64,int16x4_t,Insert vector element from another vector element
+FALSE,vcreate_s32,a: u64,int32x2_t,Insert vector element from another vector element
+FALSE,vcreate_s64,a: u64,int64x1_t,Insert vector element from another vector element
+FALSE,vcreate_s8,a: u64,int8x8_t,Insert vector element from another vector element
+FALSE,vcreate_u16,a: u64,uint16x4_t,Insert vector element from another vector element
+FALSE,vcreate_u32,a: u64,uint32x2_t,Insert vector element from another vector element
+FALSE,vcreate_u64,a: u64,uint64x1_t,Insert vector element from another vector element
+FALSE,vcreate_u8,a: u64,uint8x8_t,Insert vector element from another vector element
+FALSE,vcvt_f16_f32,a: float32x4_t,float16x4_t,Floating-point convert to lower precision narrow
+FALSE,vcvt_f16_s16,a: int16x4_t,float16x4_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_f16_u16,a: uint16x4_t,float16x4_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvt_f32_bf16,a: bfloat16x4_t,float32x4_t,Shift left long
+FALSE,vcvt_f32_f16,a: float16x4_t,float32x4_t,Floating-point convert to higher precision long
+TRUE,vcvt_f32_f64,a: float64x2_t,float32x2_t,Floating-point convert to lower precision narrow
+FALSE,vcvt_f32_s32,a: int32x2_t,float32x2_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_f32_u32,a: uint32x2_t,float32x2_t,Unsigned fixed-point convert to floating-point
+TRUE,vcvt_f64_f32,a: float32x2_t,float64x2_t,Floating-point convert to higher precision long
+FALSE,vcvt_f64_s64,a: int64x1_t,float64x1_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_f64_u64,a: uint64x1_t,float64x1_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvt_high_bf16_f32,"inactive: bfloat16x4_t, a: float32x4_t",bfloat16x4_t,Floating-point convert from single-precision to bfloat16 format
+FALSE,vcvt_high_f16_f32,"r: float16x4_t, a: float32x4_t",float16x8_t,Floating-point convert to lower precision narrow
+FALSE,vcvt_high_f32_f16,a: float16x8_t,float32x4_t,Floating-point convert to higher precision long
+TRUE,vcvt_high_f32_f64,"r: float32x2_t, a: float64x2_t",float32x4_t,Floating-point convert to lower precision narrow
+TRUE,vcvt_high_f64_f32,a: float32x4_t,float64x2_t,Floating-point convert to higher precision long
+FALSE,vcvt_low_bf16_f32,a: float32x4_t,bfloat16x4_t,Floating-point convert from single-precision to bfloat16 format
+FALSE,vcvt_n_f16_s16,"a: int16x4_t, n: const int",float16x4_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_n_f16_u16,"a: uint16x4_t, n: const int",float16x4_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvt_n_f32_s32,"a: int32x2_t, n: const int",float32x2_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_n_f32_u32,"a: uint32x2_t, n: const int",float32x2_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvt_n_f64_s64,"a: int64x1_t, n: const int",float64x1_t,Signed fixed-point convert to floating-point
+FALSE,vcvt_n_f64_u64,"a: uint64x1_t, n: const int",float64x1_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvt_n_s16_f16,"a: float16x4_t, n: const int",int16x4_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvt_n_s32_f32,"a: float32x2_t, n: const int",int32x2_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvt_n_s64_f64,"a: float64x1_t, n: const int",int64x1_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvt_n_u16_f16,"a: float16x4_t, n: const int",uint16x4_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvt_n_u32_f32,"a: float32x2_t, n: const int",uint32x2_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvt_n_u64_f64,"a: float64x1_t, n: const int",uint64x1_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvt_s16_f16,a: float16x4_t,int16x4_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvt_s32_f32,a: float32x2_t,int32x2_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvt_s64_f64,a: float64x1_t,int64x1_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvt_u16_f16,a: float16x4_t,uint16x4_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvt_u32_f32,a: float32x2_t,uint32x2_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+TRUE,vcvt_u64_f64,a: float64x1_t,uint64x1_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvta_s16_f16,a: float16x4_t,int16x4_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+TRUE,vcvta_s32_f32,a: float32x2_t,int32x2_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+TRUE,vcvta_s64_f64,a: float64x1_t,int64x1_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvta_u16_f16,a: float16x4_t,uint16x4_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+TRUE,vcvta_u32_f32,a: float32x2_t,uint32x2_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+TRUE,vcvta_u64_f64,a: float64x1_t,uint64x1_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtad_s64_f64,a: float64_t,i64,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtad_u64_f64,a: float64_t,u64,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtah_f32_bf16,a: bfloat16_t,f32,Shift left
+FALSE,vcvtah_s16_f16,a: float16_t,i16,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtah_s32_f16,a: float16_t,i32,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtah_s64_f16,a: float16_t,i64,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtah_u16_f16,a: float16_t,u16,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtah_u32_f16,a: float16_t,u32,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtah_u64_f16,a: float16_t,u64,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtaq_s16_f16,a: float16x8_t,int16x8_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+TRUE,vcvtaq_s32_f32,a: float32x4_t,int32x4_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+TRUE,vcvtaq_s64_f64,a: float64x2_t,int64x2_t,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtaq_u16_f16,a: float16x8_t,uint16x8_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+TRUE,vcvtaq_u32_f32,a: float32x4_t,uint32x4_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+TRUE,vcvtaq_u64_f64,a: float64x2_t,uint64x2_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtas_s32_f32,a: f32,i32,"Floating-point convert to signed integer, rounding to nearest with ties to away"
+FALSE,vcvtas_u32_f32,a: f32,u32,"Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+FALSE,vcvtd_f64_s64,a: i64,float64_t,Signed fixed-point convert to floating-point
+FALSE,vcvtd_f64_u64,a: u64,float64_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtd_n_f64_s64,"a: i64, n: const int",float64_t,Signed fixed-point convert to floating-point
+FALSE,vcvtd_n_f64_u64,"a: u64, n: const int",float64_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtd_n_s64_f64,"a: float64_t, n: const int",i64,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtd_n_u64_f64,"a: float64_t, n: const int",u64,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvtd_s64_f64,a: float64_t,i64,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtd_u64_f64,a: float64_t,u64,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_bf16_f32,a: f32,bfloat16_t,Floating-point convert from single-precision to bfloat16 format
+FALSE,vcvth_f16_s16,a: i16,float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_f16_s32,a: i32,float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_f16_s64,a: i64,float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_f16_u16,a: u16,float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_f16_u32,a: u32,float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_f16_u64,a: u64,float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_n_f16_s16,"a: i16, n: const int",float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_n_f16_s32,"a: i32, n: const int",float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_n_f16_s64,"a: i64, n: const int",float16_t,Signed fixed-point convert to floating-point
+FALSE,vcvth_n_f16_u16,"a: u16, n: const int",float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_n_f16_u32,"a: u32, n: const int",float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_n_f16_u64,"a: u64, n: const int",float16_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvth_n_s16_f16,"a: float16_t, n: const int",i16,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_n_s32_f16,"a: float16_t, n: const int",i32,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_n_s64_f16,"a: float16_t, n: const int",i64,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_n_u16_f16,"a: float16_t, n: const int",u16,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_n_u32_f16,"a: float16_t, n: const int",u32,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_n_u64_f16,"a: float16_t, n: const int",u64,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_s16_f16,a: float16_t,i16,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_s32_f16,a: float16_t,i32,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_s64_f16,a: float16_t,i64,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvth_u16_f16,a: float16_t,u16,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_u32_f16,a: float16_t,u32,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvth_u64_f16,a: float16_t,u64,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvtm_s16_f16,a: float16x4_t,int16x4_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+TRUE,vcvtm_s32_f32,a: float32x2_t,int32x2_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+TRUE,vcvtm_s64_f64,a: float64x1_t,int64x1_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtm_u16_f16,a: float16x4_t,uint16x4_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+TRUE,vcvtm_u32_f32,a: float32x2_t,uint32x2_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+TRUE,vcvtm_u64_f64,a: float64x1_t,uint64x1_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtmd_s64_f64,a: float64_t,i64,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtmd_u64_f64,a: float64_t,u64,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtmh_s16_f16,a: float16_t,i16,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtmh_s32_f16,a: float16_t,i32,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtmh_s64_f16,a: float16_t,i64,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtmh_u16_f16,a: float16_t,u16,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtmh_u32_f16,a: float16_t,u32,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtmh_u64_f16,a: float16_t,u64,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtmq_s16_f16,a: float16x8_t,int16x8_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+TRUE,vcvtmq_s32_f32,a: float32x4_t,int32x4_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+TRUE,vcvtmq_s64_f64,a: float64x2_t,int64x2_t,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtmq_u16_f16,a: float16x8_t,uint16x8_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+TRUE,vcvtmq_u32_f32,a: float32x4_t,uint32x4_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+TRUE,vcvtmq_u64_f64,a: float64x2_t,uint64x2_t,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtms_s32_f32,a: f32,i32,"Floating-point convert to signed integer, rounding toward minus infinity"
+FALSE,vcvtms_u32_f32,a: f32,u32,"Floating-point convert to unsigned integer, rounding toward minus infinity"
+FALSE,vcvtn_s16_f16,a: float16x4_t,int16x4_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+TRUE,vcvtn_s32_f32,a: float32x2_t,int32x2_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+TRUE,vcvtn_s64_f64,a: float64x1_t,int64x1_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtn_u16_f16,a: float16x4_t,uint16x4_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+TRUE,vcvtn_u32_f32,a: float32x2_t,uint32x2_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+TRUE,vcvtn_u64_f64,a: float64x1_t,uint64x1_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtnd_s64_f64,a: float64_t,i64,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtnd_u64_f64,a: float64_t,u64,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_s16_f16,a: float16_t,i16,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_s32_f16,a: float16_t,i32,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_s64_f16,a: float16_t,i64,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_u16_f16,a: float16_t,u16,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_u32_f16,a: float16_t,u32,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtnh_u64_f16,a: float16_t,u64,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtnq_s16_f16,a: float16x8_t,int16x8_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+TRUE,vcvtnq_s32_f32,a: float32x4_t,int32x4_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+TRUE,vcvtnq_s64_f64,a: float64x2_t,int64x2_t,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtnq_u16_f16,a: float16x8_t,uint16x8_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+TRUE,vcvtnq_u32_f32,a: float32x4_t,uint32x4_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+TRUE,vcvtnq_u64_f64,a: float64x2_t,uint64x2_t,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtns_s32_f32,a: f32,i32,"Floating-point convert to signed integer, rounding to nearest with ties to even"
+FALSE,vcvtns_u32_f32,a: f32,u32,"Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+FALSE,vcvtp_s16_f16,a: float16x4_t,int16x4_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+TRUE,vcvtp_s32_f32,a: float32x2_t,int32x2_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+TRUE,vcvtp_s64_f64,a: float64x1_t,int64x1_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtp_u16_f16,a: float16x4_t,uint16x4_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+TRUE,vcvtp_u32_f32,a: float32x2_t,uint32x2_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+TRUE,vcvtp_u64_f64,a: float64x1_t,uint64x1_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtpd_s64_f64,a: float64_t,i64,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtpd_u64_f64,a: float64_t,u64,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtph_s16_f16,a: float16_t,i16,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtph_s32_f16,a: float16_t,i32,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtph_s64_f16,a: float16_t,i64,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtph_u16_f16,a: float16_t,u16,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtph_u32_f16,a: float16_t,u32,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtph_u64_f16,a: float16_t,u64,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtpq_s16_f16,a: float16x8_t,int16x8_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+TRUE,vcvtpq_s32_f32,a: float32x4_t,int32x4_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+TRUE,vcvtpq_s64_f64,a: float64x2_t,int64x2_t,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtpq_u16_f16,a: float16x8_t,uint16x8_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+TRUE,vcvtpq_u32_f32,a: float32x4_t,uint32x4_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+TRUE,vcvtpq_u64_f64,a: float64x2_t,uint64x2_t,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtps_s32_f32,a: f32,i32,"Floating-point convert to signed integer, rounding toward plus infinity"
+FALSE,vcvtps_u32_f32,a: f32,u32,"Floating-point convert to unsigned integer, rounding toward plus infinity"
+FALSE,vcvtq_f16_s16,a: int16x8_t,float16x8_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_f16_u16,a: uint16x8_t,float16x8_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_f32_s32,a: int32x4_t,float32x4_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_f32_u32,a: uint32x4_t,float32x4_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_f64_s64,a: int64x2_t,float64x2_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_f64_u64,a: uint64x2_t,float64x2_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_high_bf16_f32,"inactive: bfloat16x8_t, a: float32x4_t",bfloat16x8_t,Floating-point convert from single-precision to bfloat16 format
+FALSE,vcvtq_high_f32_bf16,a: bfloat16x8_t,float32x4_t,Shift left long
+FALSE,vcvtq_low_bf16_f32,a: float32x4_t,bfloat16x8_t,Floating-point convert from single-precision to bfloat16 format
+FALSE,vcvtq_low_f32_bf16,a: bfloat16x8_t,float32x4_t,Shift left long
+FALSE,vcvtq_n_f16_s16,"a: int16x8_t, n: const int",float16x8_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_n_f16_u16,"a: uint16x8_t, n: const int",float16x8_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_n_f32_s32,"a: int32x4_t, n: const int",float32x4_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_n_f32_u32,"a: uint32x4_t, n: const int",float32x4_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_n_f64_s64,"a: int64x2_t, n: const int",float64x2_t,Signed fixed-point convert to floating-point
+FALSE,vcvtq_n_f64_u64,"a: uint64x2_t, n: const int",float64x2_t,Unsigned fixed-point convert to floating-point
+FALSE,vcvtq_n_s16_f16,"a: float16x8_t, n: const int",int16x8_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtq_n_s32_f32,"a: float32x4_t, n: const int",int32x4_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtq_n_s64_f64,"a: float64x2_t, n: const int",int64x2_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtq_n_u16_f16,"a: float16x8_t, n: const int",uint16x8_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvtq_n_u32_f32,"a: float32x4_t, n: const int",uint32x4_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvtq_n_u64_f64,"a: float64x2_t, n: const int",uint64x2_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvtq_s16_f16,a: float16x8_t,int16x8_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvtq_s32_f32,a: float32x4_t,int32x4_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvtq_s64_f64,a: float64x2_t,int64x2_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvtq_u16_f16,a: float16x8_t,uint16x8_t,"Floating-point convert to signed fixed-point, rounding toward zero"
+TRUE,vcvtq_u32_f32,a: float32x4_t,uint32x4_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+TRUE,vcvtq_u64_f64,a: float64x2_t,uint64x2_t,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvts_f32_s32,a: i32,f32,Signed fixed-point convert to floating-point
+FALSE,vcvts_f32_u32,a: u32,f32,Unsigned fixed-point convert to floating-point
+FALSE,vcvts_n_f32_s32,"a: i32, n: const int",f32,Signed fixed-point convert to floating-point
+FALSE,vcvts_n_f32_u32,"a: u32, n: const int",f32,Unsigned fixed-point convert to floating-point
+FALSE,vcvts_n_s32_f32,"a: f32, n: const int",i32,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvts_n_u32_f32,"a: f32, n: const int",u32,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+FALSE,vcvts_s32_f32,a: f32,i32,"Floating-point convert to signed fixed-point, rounding toward zero"
+FALSE,vcvts_u32_f32,a: f32,u32,"Floating-point convert to unsigned fixed-point, rounding toward zero"
+TRUE,vcvtx_f32_f64,a: float64x2_t,float32x2_t,"Floating-point convert to lower precision narrow, rounding to odd"
+TRUE,vcvtx_high_f32_f64,"r: float32x2_t, a: float64x2_t",float32x4_t,"Floating-point convert to lower precision narrow, rounding to odd"
+FALSE,vcvtxd_f32_f64,a: float64_t,f32,"Floating-point convert to lower precision narrow, rounding to odd"
+FALSE,vdiv_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point divide
+TRUE,vdiv_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point divide
+TRUE,vdiv_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point divide
+FALSE,vdivh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point divide
+FALSE,vdivq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point divide
+TRUE,vdivq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point divide
+TRUE,vdivq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point divide
+FALSE,vdot_lane_s32,"r: int32x2_t, a: int8x8_t, b: int8x8_t, lane: const int",int32x2_t,Dot product signed arithmetic
+FALSE,vdot_lane_u32,"r: uint32x2_t, a: uint8x8_t, b: uint8x8_t, lane: const int",uint32x2_t,Dot product unsigned arithmetic
+FALSE,vdot_laneq_s32,"r: int32x2_t, a: int8x8_t, b: int8x16_t, lane: const int",int32x2_t,Dot product signed arithmetic
+FALSE,vdot_laneq_u32,"r: uint32x2_t, a: uint8x8_t, b: uint8x16_t, lane: const int",uint32x2_t,Dot product unsigned arithmetic
+FALSE,vdot_s32,"r: int32x2_t, a: int8x8_t, b: int8x8_t",int32x2_t,Dot product signed arithmetic
+FALSE,vdot_u32,"r: uint32x2_t, a: uint8x8_t, b: uint8x8_t",uint32x2_t,Dot product unsigned arithmetic
+FALSE,vdotq_lane_s32,"r: int32x4_t, a: int8x16_t, b: int8x8_t, lane: const int",int32x4_t,Dot product signed arithmetic
+FALSE,vdotq_lane_u32,"r: uint32x4_t, a: uint8x16_t, b: uint8x8_t, lane: const int",uint32x4_t,Dot product unsigned arithmetic
+FALSE,vdotq_laneq_s32,"r: int32x4_t, a: int8x16_t, b: int8x16_t, lane: const int",int32x4_t,Dot product signed arithmetic
+FALSE,vdotq_laneq_u32,"r: uint32x4_t, a: uint8x16_t, b: uint8x16_t, lane: const int",uint32x4_t,Dot product unsigned arithmetic
+FALSE,vdotq_s32,"r: int32x4_t, a: int8x16_t, b: int8x16_t",int32x4_t,Dot product signed arithmetic
+FALSE,vdotq_u32,"r: uint32x4_t, a: uint8x16_t, b: uint8x16_t",uint32x4_t,Dot product unsigned arithmetic
+FALSE,vdup_lane_bf16,"vec: bfloat16x4_t, lane: const int",bfloat16x4_t,Duplicate vector element to vector or scalar
+FALSE,vdup_lane_f16,"vec: float16x4_t, lane: const int",float16x4_t,Set all vector lanes to the same value
+TRUE,vdup_lane_f32,"vec: float32x2_t, lane: const int",float32x2_t,Set all vector lanes to the same value
+TRUE,vdup_lane_f64,"vec: float64x1_t, lane: const int",float64x1_t,Set all vector lanes to the same value
+TRUE,vdup_lane_p16,"vec: poly16x4_t, lane: const int",poly16x4_t,Set all vector lanes to the same value
+TRUE,vdup_lane_p64,"vec: poly64x1_t, lane: const int",poly64x1_t,Set all vector lanes to the same value
+TRUE,vdup_lane_p8,"vec: poly8x8_t, lane: const int",poly8x8_t,Set all vector lanes to the same value
+TRUE,vdup_lane_s16,"vec: int16x4_t, lane: const int",int16x4_t,Set all vector lanes to the same value
+TRUE,vdup_lane_s32,"vec: int32x2_t, lane: const int",int32x2_t,Set all vector lanes to the same value
+TRUE,vdup_lane_s64,"vec: int64x1_t, lane: const int",int64x1_t,Set all vector lanes to the same value
+TRUE,vdup_lane_s8,"vec: int8x8_t, lane: const int",int8x8_t,Set all vector lanes to the same value
+TRUE,vdup_lane_u16,"vec: uint16x4_t, lane: const int",uint16x4_t,Set all vector lanes to the same value
+TRUE,vdup_lane_u32,"vec: uint32x2_t, lane: const int",uint32x2_t,Set all vector lanes to the same value
+TRUE,vdup_lane_u64,"vec: uint64x1_t, lane: const int",uint64x1_t,Set all vector lanes to the same value
+TRUE,vdup_lane_u8,"vec: uint8x8_t, lane: const int",uint8x8_t,Set all vector lanes to the same value
+FALSE,vdup_laneq_bf16,"vec: bfloat16x8_t, lane: const int",bfloat16x4_t,Duplicate vector element to vector or scalar
+FALSE,vdup_laneq_f16,"vec: float16x8_t, lane: const int",float16x4_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_f32,"vec: float32x4_t, lane: const int",float32x2_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_f64,"vec: float64x2_t, lane: const int",float64x1_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_p16,"vec: poly16x8_t, lane: const int",poly16x4_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_p64,"vec: poly64x2_t, lane: const int",poly64x1_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_p8,"vec: poly8x16_t, lane: const int",poly8x8_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_s16,"vec: int16x8_t, lane: const int",int16x4_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_s32,"vec: int32x4_t, lane: const int",int32x2_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_s64,"vec: int64x2_t, lane: const int",int64x1_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_s8,"vec: int8x16_t, lane: const int",int8x8_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_u16,"vec: uint16x8_t, lane: const int",uint16x4_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_u32,"vec: uint32x4_t, lane: const int",uint32x2_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_u64,"vec: uint64x2_t, lane: const int",uint64x1_t,Set all vector lanes to the same value
+TRUE,vdup_laneq_u8,"vec: uint8x16_t, lane: const int",uint8x8_t,Set all vector lanes to the same value
+FALSE,vdup_n_bf16,value: bfloat16_t,bfloat16x4_t,Duplicate vector element to vector or scalar
+FALSE,vdup_n_f16,value: float16_t,float16x4_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_f32,value: f32,float32x2_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_f64,value: float64_t,float64x1_t,Insert vector element from another vector element
+TRUE,vdup_n_p16,value: poly16_t,poly16x4_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_p64,value: poly64_t,poly64x1_t,Insert vector element from another vector element
+TRUE,vdup_n_p8,value: poly8_t,poly8x8_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_s16,value: i16,int16x4_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_s32,value: i32,int32x2_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_s64,value: i64,int64x1_t,Insert vector element from another vector element
+TRUE,vdup_n_s8,value: i8,int8x8_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_u16,value: u16,uint16x4_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_u32,value: u32,uint32x2_t,Duplicate vector element to vector or scalar
+TRUE,vdup_n_u64,value: u64,uint64x1_t,Insert vector element from another vector element
+TRUE,vdup_n_u8,value: u8,uint8x8_t,Duplicate vector element to vector or scalar
+TRUE,vdupb_lane_p8,"vec: poly8x8_t, lane: const int",poly8_t,Set all vector lanes to the same value
+TRUE,vdupb_lane_s8,"vec: int8x8_t, lane: const int",i8,Set all vector lanes to the same value
+TRUE,vdupb_lane_u8,"vec: uint8x8_t, lane: const int",u8,Set all vector lanes to the same value
+TRUE,vdupb_laneq_p8,"vec: poly8x16_t, lane: const int",poly8_t,Set all vector lanes to the same value
+TRUE,vdupb_laneq_s8,"vec: int8x16_t, lane: const int",i8,Set all vector lanes to the same value
+TRUE,vdupb_laneq_u8,"vec: uint8x16_t, lane: const int",u8,Set all vector lanes to the same value
+TRUE,vdupd_lane_f64,"vec: float64x1_t, lane: const int",float64_t,Set all vector lanes to the same value
+TRUE,vdupd_lane_s64,"vec: int64x1_t, lane: const int",i64,Set all vector lanes to the same value
+TRUE,vdupd_lane_u64,"vec: uint64x1_t, lane: const int",u64,Set all vector lanes to the same value
+TRUE,vdupd_laneq_f64,"vec: float64x2_t, lane: const int",float64_t,Set all vector lanes to the same value
+TRUE,vdupd_laneq_s64,"vec: int64x2_t, lane: const int",i64,Set all vector lanes to the same value
+TRUE,vdupd_laneq_u64,"vec: uint64x2_t, lane: const int",u64,Set all vector lanes to the same value
+FALSE,vduph_lane_bf16,"vec: bfloat16x4_t, lane: const int",bfloat16_t,Duplicate vector element to vector or scalar
+FALSE,vduph_lane_f16,"vec: float16x4_t, lane: const int",float16_t,Set all vector lanes to the same value
+TRUE,vduph_lane_p16,"vec: poly16x4_t, lane: const int",poly16_t,Set all vector lanes to the same value
+TRUE,vduph_lane_s16,"vec: int16x4_t, lane: const int",i16,Set all vector lanes to the same value
+TRUE,vduph_lane_u16,"vec: uint16x4_t, lane: const int",u16,Set all vector lanes to the same value
+FALSE,vduph_laneq_bf16,"vec: bfloat16x8_t, lane: const int",bfloat16_t,Duplicate vector element to vector or scalar
+FALSE,vduph_laneq_f16,"vec: float16x8_t, lane: const int",float16_t,Set all vector lanes to the same value
+TRUE,vduph_laneq_p16,"vec: poly16x8_t, lane: const int",poly16_t,Set all vector lanes to the same value
+TRUE,vduph_laneq_s16,"vec: int16x8_t, lane: const int",i16,Set all vector lanes to the same value
+TRUE,vduph_laneq_u16,"vec: uint16x8_t, lane: const int",u16,Set all vector lanes to the same value
+FALSE,vdupq_lane_bf16,"vec: bfloat16x4_t, lane: const int",bfloat16x8_t,Duplicate vector element to vector or scalar
+FALSE,vdupq_lane_f16,"vec: float16x4_t, lane: const int",float16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_f32,"vec: float32x2_t, lane: const int",float32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_f64,"vec: float64x1_t, lane: const int",float64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_p16,"vec: poly16x4_t, lane: const int",poly16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_p64,"vec: poly64x1_t, lane: const int",poly64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_p8,"vec: poly8x8_t, lane: const int",poly8x16_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_s16,"vec: int16x4_t, lane: const int",int16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_s32,"vec: int32x2_t, lane: const int",int32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_s64,"vec: int64x1_t, lane: const int",int64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_s8,"vec: int8x8_t, lane: const int",int8x16_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_u16,"vec: uint16x4_t, lane: const int",uint16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_u32,"vec: uint32x2_t, lane: const int",uint32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_u64,"vec: uint64x1_t, lane: const int",uint64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_lane_u8,"vec: uint8x8_t, lane: const int",uint8x16_t,Set all vector lanes to the same value
+FALSE,vdupq_laneq_bf16,"vec: bfloat16x8_t, lane: const int",bfloat16x8_t,Duplicate vector element to vector or scalar
+FALSE,vdupq_laneq_f16,"vec: float16x8_t, lane: const int",float16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_f32,"vec: float32x4_t, lane: const int",float32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_f64,"vec: float64x2_t, lane: const int",float64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_p16,"vec: poly16x8_t, lane: const int",poly16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_p64,"vec: poly64x2_t, lane: const int",poly64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_p8,"vec: poly8x16_t, lane: const int",poly8x16_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_s16,"vec: int16x8_t, lane: const int",int16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_s32,"vec: int32x4_t, lane: const int",int32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_s64,"vec: int64x2_t, lane: const int",int64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_s8,"vec: int8x16_t, lane: const int",int8x16_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_u16,"vec: uint16x8_t, lane: const int",uint16x8_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_u32,"vec: uint32x4_t, lane: const int",uint32x4_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_u64,"vec: uint64x2_t, lane: const int",uint64x2_t,Set all vector lanes to the same value
+TRUE,vdupq_laneq_u8,"vec: uint8x16_t, lane: const int",uint8x16_t,Set all vector lanes to the same value
+FALSE,vdupq_n_bf16,value: bfloat16_t,bfloat16x8_t,Duplicate vector element to vector or scalar
+FALSE,vdupq_n_f16,value: float16_t,float16x8_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_f32,value: f32,float32x4_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_f64,value: float64_t,float64x2_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_p16,value: poly16_t,poly16x8_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_p64,value: poly64_t,poly64x2_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_p8,value: poly8_t,poly8x16_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_s16,value: i16,int16x8_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_s32,value: i32,int32x4_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_s64,value: i64,int64x2_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_s8,value: i8,int8x16_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_u16,value: u16,uint16x8_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_u32,value: u32,uint32x4_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_u64,value: u64,uint64x2_t,Duplicate vector element to vector or scalar
+TRUE,vdupq_n_u8,value: u8,uint8x16_t,Duplicate vector element to vector or scalar
+TRUE,vdups_lane_f32,"vec: float32x2_t, lane: const int",f32,Set all vector lanes to the same value
+TRUE,vdups_lane_s32,"vec: int32x2_t, lane: const int",i32,Set all vector lanes to the same value
+TRUE,vdups_lane_u32,"vec: uint32x2_t, lane: const int",u32,Set all vector lanes to the same value
+TRUE,vdups_laneq_f32,"vec: float32x4_t, lane: const int",f32,Set all vector lanes to the same value
+TRUE,vdups_laneq_s32,"vec: int32x4_t, lane: const int",i32,Set all vector lanes to the same value
+TRUE,vdups_laneq_u32,"vec: uint32x4_t, lane: const int",u32,Set all vector lanes to the same value
+TRUE,veor_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Bitwise exclusive OR
+TRUE,veor_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Bitwise exclusive OR
+TRUE,veor_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Bitwise exclusive OR
+TRUE,veor_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Bitwise exclusive OR
+TRUE,veor_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Bitwise exclusive OR
+TRUE,veor_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Bitwise exclusive OR
+TRUE,veor_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Bitwise exclusive OR
+TRUE,veor_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Bitwise exclusive OR
+FALSE,veor3q_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Three-way exclusive OR
+FALSE,veor3q_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Three-way exclusive OR
+FALSE,veor3q_s64,"a: int64x2_t, b: int64x2_t, c: int64x2_t",int64x2_t,Three-way exclusive OR
+FALSE,veor3q_s8,"a: int8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Three-way exclusive OR
+FALSE,veor3q_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Three-way exclusive OR
+FALSE,veor3q_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Three-way exclusive OR
+FALSE,veor3q_u64,"a: uint64x2_t, b: uint64x2_t, c: uint64x2_t",uint64x2_t,Three-way exclusive OR
+FALSE,veor3q_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Three-way exclusive OR
+TRUE,veorq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Bitwise exclusive OR
+TRUE,veorq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Bitwise exclusive OR
+TRUE,veorq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Bitwise exclusive OR
+TRUE,veorq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Bitwise exclusive OR
+TRUE,veorq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Bitwise exclusive OR
+TRUE,veorq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Bitwise exclusive OR
+TRUE,veorq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Bitwise exclusive OR
+TRUE,veorq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Bitwise exclusive OR
+FALSE,vext_f16,"a: float16x4_t, b: float16x4_t, n: const int",float16x4_t,Extract vector from pair of vectors
+TRUE,vext_f32,"a: float32x2_t, b: float32x2_t, n: const int",float32x2_t,Extract vector from pair of vectors
+TRUE,vext_f64,"a: float64x1_t, b: float64x1_t, n: const int",float64x1_t,Extract vector from pair of vectors
+TRUE,vext_p16,"a: poly16x4_t, b: poly16x4_t, n: const int",poly16x4_t,Extract vector from pair of vectors
+TRUE,vext_p64,"a: poly64x1_t, b: poly64x1_t, n: const int",poly64x1_t,Extract vector from pair of vectors
+TRUE,vext_p8,"a: poly8x8_t, b: poly8x8_t, n: const int",poly8x8_t,Extract vector from pair of vectors
+TRUE,vext_s16,"a: int16x4_t, b: int16x4_t, n: const int",int16x4_t,Extract vector from pair of vectors
+TRUE,vext_s32,"a: int32x2_t, b: int32x2_t, n: const int",int32x2_t,Extract vector from pair of vectors
+TRUE,vext_s64,"a: int64x1_t, b: int64x1_t, n: const int",int64x1_t,Extract vector from pair of vectors
+TRUE,vext_s8,"a: int8x8_t, b: int8x8_t, n: const int",int8x8_t,Extract vector from pair of vectors
+TRUE,vext_u16,"a: uint16x4_t, b: uint16x4_t, n: const int",uint16x4_t,Extract vector from pair of vectors
+TRUE,vext_u32,"a: uint32x2_t, b: uint32x2_t, n: const int",uint32x2_t,Extract vector from pair of vectors
+TRUE,vext_u64,"a: uint64x1_t, b: uint64x1_t, n: const int",uint64x1_t,Extract vector from pair of vectors
+TRUE,vext_u8,"a: uint8x8_t, b: uint8x8_t, n: const int",uint8x8_t,Extract vector from pair of vectors
+FALSE,vextq_f16,"a: float16x8_t, b: float16x8_t, n: const int",float16x8_t,Extract vector from pair of vectors
+TRUE,vextq_f32,"a: float32x4_t, b: float32x4_t, n: const int",float32x4_t,Extract vector from pair of vectors
+TRUE,vextq_f64,"a: float64x2_t, b: float64x2_t, n: const int",float64x2_t,Extract vector from pair of vectors
+TRUE,vextq_p16,"a: poly16x8_t, b: poly16x8_t, n: const int",poly16x8_t,Extract vector from pair of vectors
+TRUE,vextq_p64,"a: poly64x2_t, b: poly64x2_t, n: const int",poly64x2_t,Extract vector from pair of vectors
+TRUE,vextq_p8,"a: poly8x16_t, b: poly8x16_t, n: const int",poly8x16_t,Extract vector from pair of vectors
+TRUE,vextq_s16,"a: int16x8_t, b: int16x8_t, n: const int",int16x8_t,Extract vector from pair of vectors
+TRUE,vextq_s32,"a: int32x4_t, b: int32x4_t, n: const int",int32x4_t,Extract vector from pair of vectors
+TRUE,vextq_s64,"a: int64x2_t, b: int64x2_t, n: const int",int64x2_t,Extract vector from pair of vectors
+TRUE,vextq_s8,"a: int8x16_t, b: int8x16_t, n: const int",int8x16_t,Extract vector from pair of vectors
+TRUE,vextq_u16,"a: uint16x8_t, b: uint16x8_t, n: const int",uint16x8_t,Extract vector from pair of vectors
+TRUE,vextq_u32,"a: uint32x4_t, b: uint32x4_t, n: const int",uint32x4_t,Extract vector from pair of vectors
+TRUE,vextq_u64,"a: uint64x2_t, b: uint64x2_t, n: const int",uint64x2_t,Extract vector from pair of vectors
+TRUE,vextq_u8,"a: uint8x16_t, b: uint8x16_t, n: const int",uint8x16_t,Extract vector from pair of vectors
+FALSE,vfma_f16,"a: float16x4_t, b: float16x4_t, c: float16x4_t",float16x4_t,Floating-point fused multiply-add to accumulator
+TRUE,vfma_f32,"a: float32x2_t, b: float32x2_t, c: float32x2_t",float32x2_t,Floating-point fused multiply-add to accumulator
+TRUE,vfma_f64,"a: float64x1_t, b: float64x1_t, c: float64x1_t",float64x1_t,Floating-point fused multiply-add
+FALSE,vfma_lane_f16,"a: float16x4_t, b: float16x4_t, v: float16x4_t, lane: const int",float16x4_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_lane_f32,"a: float32x2_t, b: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_lane_f64,"a: float64x1_t, b: float64x1_t, v: float64x1_t, lane: const int",float64x1_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_laneq_f16,"a: float16x4_t, b: float16x4_t, v: float16x8_t, lane: const int",float16x4_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_laneq_f32,"a: float32x2_t, b: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_laneq_f64,"a: float64x1_t, b: float64x1_t, v: float64x2_t, lane: const int",float64x1_t,Floating-point fused multiply-add to accumulator
+FALSE,vfma_n_f16,"a: float16x4_t, b: float16x4_t, n: float16_t",float16x4_t,Floating-point fused multiply-add to accumulator
+TRUE,vfma_n_f32,"a: float32x2_t, b: float32x2_t, n: f32",float32x2_t,Floating-point fused multiply-add to accumulator
+TRUE,vfma_n_f64,"a: float64x1_t, b: float64x1_t, n: float64_t",float64x1_t,Floating-point fused multiply-add
+FALSE,vfmad_lane_f64,"a: float64_t, b: float64_t, v: float64x1_t, lane: const int",float64_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmad_laneq_f64,"a: float64_t, b: float64_t, v: float64x2_t, lane: const int",float64_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmah_f16,"a: float16_t, b: float16_t, c: float16_t",float16_t,Floating-point fused multiply-add
+FALSE,vfmah_lane_f16,"a: float16_t, b: float16_t, v: float16x4_t, lane: const int",float16_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmah_laneq_f16,"a: float16_t, b: float16_t, v: float16x8_t, lane: const int",float16_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_f16,"a: float16x8_t, b: float16x8_t, c: float16x8_t",float16x8_t,Floating-point fused multiply-add to accumulator
+TRUE,vfmaq_f32,"a: float32x4_t, b: float32x4_t, c: float32x4_t",float32x4_t,Floating-point fused multiply-add to accumulator
+TRUE,vfmaq_f64,"a: float64x2_t, b: float64x2_t, c: float64x2_t",float64x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_lane_f16,"a: float16x8_t, b: float16x8_t, v: float16x4_t, lane: const int",float16x8_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_lane_f32,"a: float32x4_t, b: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_lane_f64,"a: float64x2_t, b: float64x2_t, v: float64x1_t, lane: const int",float64x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_laneq_f16,"a: float16x8_t, b: float16x8_t, v: float16x8_t, lane: const int",float16x8_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_laneq_f32,"a: float32x4_t, b: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_laneq_f64,"a: float64x2_t, b: float64x2_t, v: float64x2_t, lane: const int",float64x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmaq_n_f16,"a: float16x8_t, b: float16x8_t, n: float16_t",float16x8_t,Floating-point fused multiply-add to accumulator
+TRUE,vfmaq_n_f32,"a: float32x4_t, b: float32x4_t, n: f32",float32x4_t,Floating-point fused multiply-add to accumulator
+TRUE,vfmaq_n_f64,"a: float64x2_t, b: float64x2_t, n: float64_t",float64x2_t,Floating-point fused multiply-add to accumulator
+FALSE,vfmas_lane_f32,"a: f32, b: f32, v: float32x2_t, lane: const int",f32,Floating-point fused multiply-add to accumulator
+FALSE,vfmas_laneq_f32,"a: f32, b: f32, v: float32x4_t, lane: const int",f32,Floating-point fused multiply-add to accumulator
+FALSE,vfmlal_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlal_lane_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t, lane: const int",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlal_lane_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t, lane: const int",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlal_laneq_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x8_t, lane: const int",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlal_laneq_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x8_t, lane: const int",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlal_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t",float32x2_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_lane_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x4_t, lane: const int",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_lane_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x4_t, lane: const int",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_laneq_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t, lane: const int",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_laneq_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t, lane: const int",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlalq_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t",float32x4_t,Floating-point fused multiply-add long to accumulator
+FALSE,vfmlsl_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlsl_lane_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlsl_lane_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlsl_laneq_high_f16,"r: float32x2_t, a: float16x4_t, b: float16x8_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlsl_laneq_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x8_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlsl_low_f16,"r: float32x2_t, a: float16x4_t, b: float16x4_t",float32x2_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_lane_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x4_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_lane_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x4_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_laneq_high_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_laneq_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfmlslq_low_f16,"r: float32x4_t, a: float16x8_t, b: float16x8_t",float32x4_t,Floating-point fused multiply-subtract long from accumulator
+FALSE,vfms_f16,"a: float16x4_t, b: float16x4_t, c: float16x4_t",float16x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_f32,"a: float32x2_t, b: float32x2_t, c: float32x2_t",float32x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_f64,"a: float64x1_t, b: float64x1_t, c: float64x1_t",float64x1_t,Floating-point fused multiply-subtract
+FALSE,vfms_lane_f16,"a: float16x4_t, b: float16x4_t, v: float16x4_t, lane: const int",float16x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_lane_f32,"a: float32x2_t, b: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_lane_f64,"a: float64x1_t, b: float64x1_t, v: float64x1_t, lane: const int",float64x1_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_laneq_f16,"a: float16x4_t, b: float16x4_t, v: float16x8_t, lane: const int",float16x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_laneq_f32,"a: float32x2_t, b: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_laneq_f64,"a: float64x1_t, b: float64x1_t, v: float64x2_t, lane: const int",float64x1_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_n_f16,"a: float16x4_t, b: float16x4_t, n: float16_t",float16x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_n_f32,"a: float32x2_t, b: float32x2_t, n: f32",float32x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfms_n_f64,"a: float64x1_t, b: float64x1_t, n: float64_t",float64x1_t,Floating-point fused multiply-subtract
+FALSE,vfmsd_lane_f64,"a: float64_t, b: float64_t, v: float64x1_t, lane: const int",float64_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsd_laneq_f64,"a: float64_t, b: float64_t, v: float64x2_t, lane: const int",float64_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsh_f16,"a: float16_t, b: float16_t, c: float16_t",float16_t,Floating-point fused multiply-subtract
+FALSE,vfmsh_lane_f16,"a: float16_t, b: float16_t, v: float16x4_t, lane: const int",float16_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsh_laneq_f16,"a: float16_t, b: float16_t, v: float16x8_t, lane: const int",float16_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_f16,"a: float16x8_t, b: float16x8_t, c: float16x8_t",float16x8_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_f32,"a: float32x4_t, b: float32x4_t, c: float32x4_t",float32x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_f64,"a: float64x2_t, b: float64x2_t, c: float64x2_t",float64x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_lane_f16,"a: float16x8_t, b: float16x8_t, v: float16x4_t, lane: const int",float16x8_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_lane_f32,"a: float32x4_t, b: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_lane_f64,"a: float64x2_t, b: float64x2_t, v: float64x1_t, lane: const int",float64x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_laneq_f16,"a: float16x8_t, b: float16x8_t, v: float16x8_t, lane: const int",float16x8_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_laneq_f32,"a: float32x4_t, b: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_laneq_f64,"a: float64x2_t, b: float64x2_t, v: float64x2_t, lane: const int",float64x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_n_f16,"a: float16x8_t, b: float16x8_t, n: float16_t",float16x8_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_n_f32,"a: float32x4_t, b: float32x4_t, n: f32",float32x4_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmsq_n_f64,"a: float64x2_t, b: float64x2_t, n: float64_t",float64x2_t,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmss_lane_f32,"a: f32, b: f32, v: float32x2_t, lane: const int",f32,Floating-point fused multiply-subtract from accumulator
+FALSE,vfmss_laneq_f32,"a: f32, b: f32, v: float32x4_t, lane: const int",f32,Floating-point fused multiply-subtract from accumulator
+FALSE,vget_high_bf16,a: bfloat16x8_t,bfloat16x4_t,Duplicate vector element to vector or scalar
+FALSE,vget_high_f16,a: float16x8_t,float16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_f32,a: float32x4_t,float32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_f64,a: float64x2_t,float64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_p16,a: poly16x8_t,poly16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_p64,a: poly64x2_t,poly64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_p8,a: poly8x16_t,poly8x8_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_s16,a: int16x8_t,int16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_s32,a: int32x4_t,int32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_s64,a: int64x2_t,int64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_s8,a: int8x16_t,int8x8_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_u16,a: uint16x8_t,uint16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_u32,a: uint32x4_t,uint32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_u64,a: uint64x2_t,uint64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_high_u8,a: uint8x16_t,uint8x8_t,Duplicate vector element to vector or scalar
+FALSE,vget_lane_bf16,"v: bfloat16x4_t, lane: const int",bfloat16_t,Duplicate vector element to vector or scalar
+FALSE,vget_lane_f16,"v: float16x4_t, lane: const int",float16_t,Duplicate vector element to vector or scalar
+FALSE,vget_lane_f32,"v: float32x2_t, lane: const int",f32,Duplicate vector element to vector or scalar
+FALSE,vget_lane_f64,"v: float64x1_t, lane: const int",float64_t,Duplicate vector element to vector or scalar
+FALSE,vget_lane_p16,"v: poly16x4_t, lane: const int",poly16_t,Unsigned move vector element to general-purpose register
+FALSE,vget_lane_p64,"v: poly64x1_t, lane: const int",poly64_t,Unsigned move vector element to general-purpose register
+FALSE,vget_lane_p8,"v: poly8x8_t, lane: const int",poly8_t,Unsigned move vector element to general-purpose register
+FALSE,vget_lane_s16,"v: int16x4_t, lane: const int",i16,Signed move vector element to general-purpose register
+FALSE,vget_lane_s32,"v: int32x2_t, lane: const int",i32,Signed move vector element to general-purpose register
+FALSE,vget_lane_s64,"v: int64x1_t, lane: const int",i64,Unsigned move vector element to general-purpose register
+FALSE,vget_lane_s8,"v: int8x8_t, lane: const int",i8,Signed move vector element to general-purpose register
+FALSE,vget_lane_u16,"v: uint16x4_t, lane: const int",u16,Unsigned move vector element to general-purpose register
+FALSE,vget_lane_u32,"v: uint32x2_t, lane: const int",u32,Unsigned move vector element to general-purpose register
+TRUE,vget_lane_u64,"v: uint64x1_t, lane: const int",u64,Unsigned move vector element to general-purpose register
+TRUE,vget_lane_u8,"v: uint8x8_t, lane: const int",u8,Unsigned move vector element to general-purpose register
+FALSE,vget_low_bf16,a: bfloat16x8_t,bfloat16x4_t,Duplicate vector element to vector or scalar
+FALSE,vget_low_f16,a: float16x8_t,float16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_f32,a: float32x4_t,float32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_f64,a: float64x2_t,float64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_p16,a: poly16x8_t,poly16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_p64,a: poly64x2_t,poly64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_p8,a: poly8x16_t,poly8x8_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_s16,a: int16x8_t,int16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_s32,a: int32x4_t,int32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_s64,a: int64x2_t,int64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_s8,a: int8x16_t,int8x8_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_u16,a: uint16x8_t,uint16x4_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_u32,a: uint32x4_t,uint32x2_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_u64,a: uint64x2_t,uint64x1_t,Duplicate vector element to vector or scalar
+TRUE,vget_low_u8,a: uint8x16_t,uint8x8_t,Duplicate vector element to vector or scalar
+FALSE,vgetq_lane_bf16,"v: bfloat16x8_t, lane: const int",bfloat16_t,Duplicate vector element to vector or scalar
+FALSE,vgetq_lane_f16,"v: float16x8_t, lane: const int",float16_t,Duplicate vector element to vector or scalar
+FALSE,vgetq_lane_f32,"v: float32x4_t, lane: const int",f32,Duplicate vector element to vector or scalar
+FALSE,vgetq_lane_f64,"v: float64x2_t, lane: const int",float64_t,Duplicate vector element to vector or scalar
+FALSE,vgetq_lane_p16,"v: poly16x8_t, lane: const int",poly16_t,Unsigned move vector element to general-purpose register
+FALSE,vgetq_lane_p64,"v: poly64x2_t, lane: const int",poly64_t,Unsigned move vector element to general-purpose register
+FALSE,vgetq_lane_p8,"v: poly8x16_t, lane: const int",poly8_t,Unsigned move vector element to general-purpose register
+FALSE,vgetq_lane_s16,"v: int16x8_t, lane: const int",i16,Signed move vector element to general-purpose register
+FALSE,vgetq_lane_s32,"v: int32x4_t, lane: const int",i32,Signed move vector element to general-purpose register
+FALSE,vgetq_lane_s64,"v: int64x2_t, lane: const int",i64,Unsigned move vector element to general-purpose register
+FALSE,vgetq_lane_s8,"v: int8x16_t, lane: const int",i8,Signed move vector element to general-purpose register
+TRUE,vgetq_lane_u16,"v: uint16x8_t, lane: const int",u16,Unsigned move vector element to general-purpose register
+TRUE,vgetq_lane_u32,"v: uint32x4_t, lane: const int",u32,Unsigned move vector element to general-purpose register
+TRUE,vgetq_lane_u64,"v: uint64x2_t, lane: const int",u64,Unsigned move vector element to general-purpose register
+FALSE,vgetq_lane_u8,"v: uint8x16_t, lane: const int",u8,Unsigned move vector element to general-purpose register
+TRUE,vhadd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed halving add
+TRUE,vhadd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed halving add
+TRUE,vhadd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed halving add
+TRUE,vhadd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned halving add
+TRUE,vhadd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned halving add
+TRUE,vhadd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned halving add
+TRUE,vhaddq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed halving add
+TRUE,vhaddq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed halving add
+TRUE,vhaddq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed halving add
+TRUE,vhaddq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned halving add
+TRUE,vhaddq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned halving add
+TRUE,vhaddq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned halving add
+TRUE,vhsub_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed halving subtract
+TRUE,vhsub_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed halving subtract
+TRUE,vhsub_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed halving subtract
+TRUE,vhsub_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned halving subtract
+TRUE,vhsub_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned halving subtract
+TRUE,vhsub_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned halving subtract
+TRUE,vhsubq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed halving subtract
+TRUE,vhsubq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed halving subtract
+TRUE,vhsubq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed halving subtract
+TRUE,vhsubq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned halving subtract
+TRUE,vhsubq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned halving subtract
+TRUE,vhsubq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned halving subtract
+FALSE,vld1_bf16,ptr: *const bfloat16_t,bfloat16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_bf16_x2,ptr: *const bfloat16_t,bfloat16x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_bf16_x3,ptr: *const bfloat16_t,bfloat16x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_bf16_x4,ptr: *const bfloat16_t,bfloat16x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_dup_bf16,ptr: *const bfloat16_t,bfloat16x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_f16,ptr: *const float16_t,float16x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_f32,ptr: *const f32,float32x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_f64,ptr: *const float64_t,float64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_dup_p16,ptr: *const poly16_t,poly16x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_p64,ptr: *const poly64_t,poly64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_dup_p8,ptr: *const poly8_t,poly8x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_s16,ptr: *const i16,int16x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_s32,ptr: *const i32,int32x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_s64,ptr: *const i64,int64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_dup_s8,ptr: *const i8,int8x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_u16,ptr: *const u16,uint16x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_u32,ptr: *const u32,uint32x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_dup_u64,ptr: *const u64,uint64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_dup_u8,ptr: *const u8,uint8x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1_f16,ptr: *const float16_t,float16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f16_x2,ptr: *const float16_t,float16x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f16_x3,ptr: *const float16_t,float16x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f16_x4,ptr: *const float16_t,float16x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f32,ptr: *const f32,float32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f32_x2,ptr: *const f32,float32x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f32_x3,ptr: *const f32,float32x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f32_x4,ptr: *const f32,float32x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f64,ptr: *const float64_t,float64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f64_x2,ptr: *const float64_t,float64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f64_x3,ptr: *const float64_t,float64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_f64_x4,ptr: *const float64_t,float64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x4_t, lane: const int",bfloat16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_f16,"ptr: *const float16_t, src: float16x4_t, lane: const int",float16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_f32,"ptr: *const f32, src: float32x2_t, lane: const int",float32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_f64,"ptr: *const float64_t, src: float64x1_t, lane: const int",float64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_p16,"ptr: *const poly16_t, src: poly16x4_t, lane: const int",poly16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_p64,"ptr: *const poly64_t, src: poly64x1_t, lane: const int",poly64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_p8,"ptr: *const poly8_t, src: poly8x8_t, lane: const int",poly8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_s16,"ptr: *const i16, src: int16x4_t, lane: const int",int16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_s32,"ptr: *const i32, src: int32x2_t, lane: const int",int32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_s64,"ptr: *const i64, src: int64x1_t, lane: const int",int64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_s8,"ptr: *const i8, src: int8x8_t, lane: const int",int8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_u16,"ptr: *const u16, src: uint16x4_t, lane: const int",uint16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_u32,"ptr: *const u32, src: uint32x2_t, lane: const int",uint32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_u64,"ptr: *const u64, src: uint64x1_t, lane: const int",uint64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_lane_u8,"ptr: *const u8, src: uint8x8_t, lane: const int",uint8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p16,ptr: *const poly16_t,poly16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p16_x2,ptr: *const poly16_t,poly16x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p16_x3,ptr: *const poly16_t,poly16x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p16_x4,ptr: *const poly16_t,poly16x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p64,ptr: *const poly64_t,poly64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p64_x2,ptr: *const poly64_t,poly64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p64_x3,ptr: *const poly64_t,poly64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p64_x4,ptr: *const poly64_t,poly64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p8,ptr: *const poly8_t,poly8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p8_x2,ptr: *const poly8_t,poly8x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p8_x3,ptr: *const poly8_t,poly8x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_p8_x4,ptr: *const poly8_t,poly8x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s16,ptr: *const i16,int16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s16_x2,ptr: *const i16,int16x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s16_x3,ptr: *const i16,int16x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s16_x4,ptr: *const i16,int16x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s32,ptr: *const i32,int32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s32_x2,ptr: *const i32,int32x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s32_x3,ptr: *const i32,int32x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s32_x4,ptr: *const i32,int32x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s64,ptr: *const i64,int64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s64_x2,ptr: *const i64,int64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s64_x3,ptr: *const i64,int64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s64_x4,ptr: *const i64,int64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s8,ptr: *const i8,int8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s8_x2,ptr: *const i8,int8x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s8_x3,ptr: *const i8,int8x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_s8_x4,ptr: *const i8,int8x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u16,ptr: *const u16,uint16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u16_x2,ptr: *const u16,uint16x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u16_x3,ptr: *const u16,uint16x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u16_x4,ptr: *const u16,uint16x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u32,ptr: *const u32,uint32x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u32_x2,ptr: *const u32,uint32x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u32_x3,ptr: *const u32,uint32x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u32_x4,ptr: *const u32,uint32x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u64,ptr: *const u64,uint64x1_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u64_x2,ptr: *const u64,uint64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u64_x3,ptr: *const u64,uint64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u64_x4,ptr: *const u64,uint64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u8,ptr: *const u8,uint8x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u8_x2,ptr: *const u8,uint8x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u8_x3,ptr: *const u8,uint8x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1_u8_x4,ptr: *const u8,uint8x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_bf16,ptr: *const bfloat16_t,bfloat16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_bf16_x2,ptr: *const bfloat16_t,bfloat16x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_bf16_x3,ptr: *const bfloat16_t,bfloat16x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_bf16_x4,ptr: *const bfloat16_t,bfloat16x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_dup_bf16,ptr: *const bfloat16_t,bfloat16x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_f16,ptr: *const float16_t,float16x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_f32,ptr: *const f32,float32x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_f64,ptr: *const float64_t,float64x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_p16,ptr: *const poly16_t,poly16x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_p64,ptr: *const poly64_t,poly64x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_p8,ptr: *const poly8_t,poly8x16_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_s16,ptr: *const i16,int16x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_s32,ptr: *const i32,int32x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_s64,ptr: *const i64,int64x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_s8,ptr: *const i8,int8x16_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_u16,ptr: *const u16,uint16x8_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_u32,ptr: *const u32,uint32x4_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_u64,ptr: *const u64,uint64x2_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_dup_u8,ptr: *const u8,uint8x16_t,Load one single-element structure and replicate to all lanes (of one register)
+FALSE,vld1q_f16,ptr: *const float16_t,float16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f16_x2,ptr: *const float16_t,float16x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f16_x3,ptr: *const float16_t,float16x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f16_x4,ptr: *const float16_t,float16x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f32,ptr: *const f32,float32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f32_x2,ptr: *const f32,float32x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f32_x3,ptr: *const f32,float32x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f32_x4,ptr: *const f32,float32x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f64,ptr: *const float64_t,float64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f64_x2,ptr: *const float64_t,float64x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f64_x3,ptr: *const float64_t,float64x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_f64_x4,ptr: *const float64_t,float64x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x8_t, lane: const int",bfloat16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_f16,"ptr: *const float16_t, src: float16x8_t, lane: const int",float16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_f32,"ptr: *const f32, src: float32x4_t, lane: const int",float32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_f64,"ptr: *const float64_t, src: float64x2_t, lane: const int",float64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_p16,"ptr: *const poly16_t, src: poly16x8_t, lane: const int",poly16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_p64,"ptr: *const poly64_t, src: poly64x2_t, lane: const int",poly64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_p8,"ptr: *const poly8_t, src: poly8x16_t, lane: const int",poly8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_s16,"ptr: *const i16, src: int16x8_t, lane: const int",int16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_s32,"ptr: *const i32, src: int32x4_t, lane: const int",int32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_s64,"ptr: *const i64, src: int64x2_t, lane: const int",int64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_s8,"ptr: *const i8, src: int8x16_t, lane: const int",int8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_u16,"ptr: *const u16, src: uint16x8_t, lane: const int",uint16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_u32,"ptr: *const u32, src: uint32x4_t, lane: const int",uint32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_u64,"ptr: *const u64, src: uint64x2_t, lane: const int",uint64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_lane_u8,"ptr: *const u8, src: uint8x16_t, lane: const int",uint8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p16,ptr: *const poly16_t,poly16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p16_x2,ptr: *const poly16_t,poly16x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p16_x3,ptr: *const poly16_t,poly16x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p16_x4,ptr: *const poly16_t,poly16x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p64,ptr: *const poly64_t,poly64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p64_x2,ptr: *const poly64_t,poly64x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p64_x3,ptr: *const poly64_t,poly64x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p64_x4,ptr: *const poly64_t,poly64x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p8,ptr: *const poly8_t,poly8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p8_x2,ptr: *const poly8_t,poly8x16x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p8_x3,ptr: *const poly8_t,poly8x16x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_p8_x4,ptr: *const poly8_t,poly8x16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s16,ptr: *const i16,int16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s16_x2,ptr: *const i16,int16x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s16_x3,ptr: *const i16,int16x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s16_x4,ptr: *const i16,int16x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s32,ptr: *const i32,int32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s32_x2,ptr: *const i32,int32x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s32_x3,ptr: *const i32,int32x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s32_x4,ptr: *const i32,int32x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s64,ptr: *const i64,int64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s64_x2,ptr: *const i64,int64x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s64_x3,ptr: *const i64,int64x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s64_x4,ptr: *const i64,int64x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+TRUE,vld1q_s8,ptr: *const i8,int8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s8_x2,ptr: *const i8,int8x16x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s8_x3,ptr: *const i8,int8x16x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_s8_x4,ptr: *const i8,int8x16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u16,ptr: *const u16,uint16x8_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u16_x2,ptr: *const u16,uint16x8x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u16_x3,ptr: *const u16,uint16x8x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u16_x4,ptr: *const u16,uint16x8x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u32,ptr: *const u32,uint32x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u32_x2,ptr: *const u32,uint32x4x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u32_x3,ptr: *const u32,uint32x4x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u32_x4,ptr: *const u32,uint32x4x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u64,ptr: *const u64,uint64x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u64_x2,ptr: *const u64,uint64x2x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u64_x3,ptr: *const u64,uint64x2x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u64_x4,ptr: *const u64,uint64x2x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+TRUE,vld1q_u8,ptr: *const u8,uint8x16_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u8_x2,ptr: *const u8,uint8x16x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u8_x3,ptr: *const u8,uint8x16x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld1q_u8_x4,ptr: *const u8,uint8x16x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld2_bf16,ptr: *const bfloat16_t,bfloat16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_dup_bf16,ptr: *const bfloat16_t,bfloat16x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_f16,ptr: *const float16_t,float16x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_f32,ptr: *const f32,float32x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_f64,ptr: *const float64_t,float64x1x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_p16,ptr: *const poly16_t,poly16x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_p64,ptr: *const poly64_t,poly64x1x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_p8,ptr: *const poly8_t,poly8x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_s16,ptr: *const i16,int16x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_s32,ptr: *const i32,int32x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_s64,ptr: *const i64,int64x1x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_s8,ptr: *const i8,int8x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_u16,ptr: *const u16,uint16x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_u32,ptr: *const u32,uint32x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_u64,ptr: *const u64,uint64x1x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_dup_u8,ptr: *const u8,uint8x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2_f16,ptr: *const float16_t,float16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_f32,ptr: *const f32,float32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_f64,ptr: *const float64_t,float64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld2_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x4x2_t, lane: const int",bfloat16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_f16,"ptr: *const float16_t, src: float16x4x2_t, lane: const int",float16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_f32,"ptr: *const f32, src: float32x2x2_t, lane: const int",float32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_f64,"ptr: *const float64_t, src: float64x1x2_t, lane: const int",float64x1x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_p16,"ptr: *const poly16_t, src: poly16x4x2_t, lane: const int",poly16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_p64,"ptr: *const poly64_t, src: poly64x1x2_t, lane: const int",poly64x1x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_p8,"ptr: *const poly8_t, src: poly8x8x2_t, lane: const int",poly8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_s16,"ptr: *const i16, src: int16x4x2_t, lane: const int",int16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_s32,"ptr: *const i32, src: int32x2x2_t, lane: const int",int32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_s64,"ptr: *const i64, src: int64x1x2_t, lane: const int",int64x1x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_s8,"ptr: *const i8, src: int8x8x2_t, lane: const int",int8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_u16,"ptr: *const u16, src: uint16x4x2_t, lane: const int",uint16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_u32,"ptr: *const u32, src: uint32x2x2_t, lane: const int",uint32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_u64,"ptr: *const u64, src: uint64x1x2_t, lane: const int",uint64x1x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_lane_u8,"ptr: *const u8, src: uint8x8x2_t, lane: const int",uint8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_p16,ptr: *const poly16_t,poly16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_p64,ptr: *const poly64_t,poly64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld2_p8,ptr: *const poly8_t,poly8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_s16,ptr: *const i16,int16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_s32,ptr: *const i32,int32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_s64,ptr: *const i64,int64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld2_s8,ptr: *const i8,int8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_u16,ptr: *const u16,uint16x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_u32,ptr: *const u32,uint32x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2_u64,ptr: *const u64,uint64x1x2_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld2_u8,ptr: *const u8,uint8x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_bf16,ptr: *const bfloat16_t,bfloat16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_dup_bf16,ptr: *const bfloat16_t,bfloat16x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_f16,ptr: *const float16_t,float16x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_f32,ptr: *const f32,float32x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_f64,ptr: *const float64_t,float64x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_p16,ptr: *const poly16_t,poly16x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_p64,ptr: *const poly64_t,poly64x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_p8,ptr: *const poly8_t,poly8x16x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_s16,ptr: *const i16,int16x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_s32,ptr: *const i32,int32x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_s64,ptr: *const i64,int64x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_s8,ptr: *const i8,int8x16x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_u16,ptr: *const u16,uint16x8x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_u32,ptr: *const u32,uint32x4x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_u64,ptr: *const u64,uint64x2x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_dup_u8,ptr: *const u8,uint8x16x2_t,Load single 2-element structure and replicate to all lanes of two registers
+FALSE,vld2q_f16,ptr: *const float16_t,float16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_f32,ptr: *const f32,float32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_f64,ptr: *const float64_t,float64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x8x2_t, lane: const int",bfloat16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_f16,"ptr: *const float16_t, src: float16x8x2_t, lane: const int",float16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_f32,"ptr: *const f32, src: float32x4x2_t, lane: const int",float32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_f64,"ptr: *const float64_t, src: float64x2x2_t, lane: const int",float64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_p16,"ptr: *const poly16_t, src: poly16x8x2_t, lane: const int",poly16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_p64,"ptr: *const poly64_t, src: poly64x2x2_t, lane: const int",poly64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_p8,"ptr: *const poly8_t, src: poly8x16x2_t, lane: const int",poly8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_s16,"ptr: *const i16, src: int16x8x2_t, lane: const int",int16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_s32,"ptr: *const i32, src: int32x4x2_t, lane: const int",int32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_s64,"ptr: *const i64, src: int64x2x2_t, lane: const int",int64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_s8,"ptr: *const i8, src: int8x16x2_t, lane: const int",int8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_u16,"ptr: *const u16, src: uint16x8x2_t, lane: const int",uint16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_u32,"ptr: *const u32, src: uint32x4x2_t, lane: const int",uint32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_u64,"ptr: *const u64, src: uint64x2x2_t, lane: const int",uint64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_lane_u8,"ptr: *const u8, src: uint8x16x2_t, lane: const int",uint8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_p16,ptr: *const poly16_t,poly16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_p64,ptr: *const poly64_t,poly64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_p8,ptr: *const poly8_t,poly8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_s16,ptr: *const i16,int16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_s32,ptr: *const i32,int32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_s64,ptr: *const i64,int64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_s8,ptr: *const i8,int8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_u16,ptr: *const u16,uint16x8x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_u32,ptr: *const u32,uint32x4x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_u64,ptr: *const u64,uint64x2x2_t,Load multiple 2-element structures to two registers
+FALSE,vld2q_u8,ptr: *const u8,uint8x16x2_t,Load multiple 2-element structures to two registers
+FALSE,vld3_bf16,ptr: *const bfloat16_t,bfloat16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_dup_bf16,ptr: *const bfloat16_t,bfloat16x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_f16,ptr: *const float16_t,float16x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_f32,ptr: *const f32,float32x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_f64,ptr: *const float64_t,float64x1x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_p16,ptr: *const poly16_t,poly16x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_p64,ptr: *const poly64_t,poly64x1x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_p8,ptr: *const poly8_t,poly8x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_s16,ptr: *const i16,int16x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_s32,ptr: *const i32,int32x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_s64,ptr: *const i64,int64x1x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_s8,ptr: *const i8,int8x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_u16,ptr: *const u16,uint16x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_u32,ptr: *const u32,uint32x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_u64,ptr: *const u64,uint64x1x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_dup_u8,ptr: *const u8,uint8x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3_f16,ptr: *const float16_t,float16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_f32,ptr: *const f32,float32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_f64,ptr: *const float64_t,float64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld3_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x4x3_t, lane: const int",bfloat16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_f16,"ptr: *const float16_t, src: float16x4x3_t, lane: const int",float16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_f32,"ptr: *const f32, src: float32x2x3_t, lane: const int",float32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_f64,"ptr: *const float64_t, src: float64x1x3_t, lane: const int",float64x1x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_p16,"ptr: *const poly16_t, src: poly16x4x3_t, lane: const int",poly16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_p64,"ptr: *const poly64_t, src: poly64x1x3_t, lane: const int",poly64x1x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_p8,"ptr: *const poly8_t, src: poly8x8x3_t, lane: const int",poly8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_s16,"ptr: *const i16, src: int16x4x3_t, lane: const int",int16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_s32,"ptr: *const i32, src: int32x2x3_t, lane: const int",int32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_s64,"ptr: *const i64, src: int64x1x3_t, lane: const int",int64x1x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_s8,"ptr: *const i8, src: int8x8x3_t, lane: const int",int8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_u16,"ptr: *const u16, src: uint16x4x3_t, lane: const int",uint16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_u32,"ptr: *const u32, src: uint32x2x3_t, lane: const int",uint32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_u64,"ptr: *const u64, src: uint64x1x3_t, lane: const int",uint64x1x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_lane_u8,"ptr: *const u8, src: uint8x8x3_t, lane: const int",uint8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_p16,ptr: *const poly16_t,poly16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_p64,ptr: *const poly64_t,poly64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld3_p8,ptr: *const poly8_t,poly8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_s16,ptr: *const i16,int16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_s32,ptr: *const i32,int32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_s64,ptr: *const i64,int64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld3_s8,ptr: *const i8,int8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_u16,ptr: *const u16,uint16x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_u32,ptr: *const u32,uint32x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3_u64,ptr: *const u64,uint64x1x3_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld3_u8,ptr: *const u8,uint8x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_bf16,ptr: *const bfloat16_t,bfloat16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_dup_bf16,ptr: *const bfloat16_t,bfloat16x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_f16,ptr: *const float16_t,float16x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_f32,ptr: *const f32,float32x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_f64,ptr: *const float64_t,float64x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_p16,ptr: *const poly16_t,poly16x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_p64,ptr: *const poly64_t,poly64x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_p8,ptr: *const poly8_t,poly8x16x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_s16,ptr: *const i16,int16x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_s32,ptr: *const i32,int32x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_s64,ptr: *const i64,int64x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_s8,ptr: *const i8,int8x16x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_u16,ptr: *const u16,uint16x8x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_u32,ptr: *const u32,uint32x4x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_u64,ptr: *const u64,uint64x2x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_dup_u8,ptr: *const u8,uint8x16x3_t,Load single 3-element structure and replicate to all lanes of three registers
+FALSE,vld3q_f16,ptr: *const float16_t,float16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_f32,ptr: *const f32,float32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_f64,ptr: *const float64_t,float64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x8x3_t, lane: const int",bfloat16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_f16,"ptr: *const float16_t, src: float16x8x3_t, lane: const int",float16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_f32,"ptr: *const f32, src: float32x4x3_t, lane: const int",float32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_f64,"ptr: *const float64_t, src: float64x2x3_t, lane: const int",float64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_p16,"ptr: *const poly16_t, src: poly16x8x3_t, lane: const int",poly16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_p64,"ptr: *const poly64_t, src: poly64x2x3_t, lane: const int",poly64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_p8,"ptr: *const poly8_t, src: poly8x16x3_t, lane: const int",poly8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_s16,"ptr: *const i16, src: int16x8x3_t, lane: const int",int16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_s32,"ptr: *const i32, src: int32x4x3_t, lane: const int",int32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_s64,"ptr: *const i64, src: int64x2x3_t, lane: const int",int64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_s8,"ptr: *const i8, src: int8x16x3_t, lane: const int",int8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_u16,"ptr: *const u16, src: uint16x8x3_t, lane: const int",uint16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_u32,"ptr: *const u32, src: uint32x4x3_t, lane: const int",uint32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_u64,"ptr: *const u64, src: uint64x2x3_t, lane: const int",uint64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_lane_u8,"ptr: *const u8, src: uint8x16x3_t, lane: const int",uint8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_p16,ptr: *const poly16_t,poly16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_p64,ptr: *const poly64_t,poly64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_p8,ptr: *const poly8_t,poly8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_s16,ptr: *const i16,int16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_s32,ptr: *const i32,int32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_s64,ptr: *const i64,int64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_s8,ptr: *const i8,int8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_u16,ptr: *const u16,uint16x8x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_u32,ptr: *const u32,uint32x4x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_u64,ptr: *const u64,uint64x2x3_t,Load multiple 3-element structures to three registers
+FALSE,vld3q_u8,ptr: *const u8,uint8x16x3_t,Load multiple 3-element structures to three registers
+FALSE,vld4_bf16,ptr: *const bfloat16_t,bfloat16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_dup_bf16,ptr: *const bfloat16_t,bfloat16x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_f16,ptr: *const float16_t,float16x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_f32,ptr: *const f32,float32x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_f64,ptr: *const float64_t,float64x1x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_p16,ptr: *const poly16_t,poly16x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_p64,ptr: *const poly64_t,poly64x1x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_p8,ptr: *const poly8_t,poly8x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_s16,ptr: *const i16,int16x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_s32,ptr: *const i32,int32x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_s64,ptr: *const i64,int64x1x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_s8,ptr: *const i8,int8x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_u16,ptr: *const u16,uint16x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_u32,ptr: *const u32,uint32x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_u64,ptr: *const u64,uint64x1x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_dup_u8,ptr: *const u8,uint8x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4_f16,ptr: *const float16_t,float16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_f32,ptr: *const f32,float32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_f64,ptr: *const float64_t,float64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld4_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x4x4_t, lane: const int",bfloat16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_f16,"ptr: *const float16_t, src: float16x4x4_t, lane: const int",float16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_f32,"ptr: *const f32, src: float32x2x4_t, lane: const int",float32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_f64,"ptr: *const float64_t, src: float64x1x4_t, lane: const int",float64x1x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_p16,"ptr: *const poly16_t, src: poly16x4x4_t, lane: const int",poly16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_p64,"ptr: *const poly64_t, src: poly64x1x4_t, lane: const int",poly64x1x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_p8,"ptr: *const poly8_t, src: poly8x8x4_t, lane: const int",poly8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_s16,"ptr: *const i16, src: int16x4x4_t, lane: const int",int16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_s32,"ptr: *const i32, src: int32x2x4_t, lane: const int",int32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_s64,"ptr: *const i64, src: int64x1x4_t, lane: const int",int64x1x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_s8,"ptr: *const i8, src: int8x8x4_t, lane: const int",int8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_u16,"ptr: *const u16, src: uint16x4x4_t, lane: const int",uint16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_u32,"ptr: *const u32, src: uint32x2x4_t, lane: const int",uint32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_u64,"ptr: *const u64, src: uint64x1x4_t, lane: const int",uint64x1x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_lane_u8,"ptr: *const u8, src: uint8x8x4_t, lane: const int",uint8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_p16,ptr: *const poly16_t,poly16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_p64,ptr: *const poly64_t,poly64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld4_p8,ptr: *const poly8_t,poly8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_s16,ptr: *const i16,int16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_s32,ptr: *const i32,int32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_s64,ptr: *const i64,int64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld4_s8,ptr: *const i8,int8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_u16,ptr: *const u16,uint16x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_u32,ptr: *const u32,uint32x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4_u64,ptr: *const u64,uint64x1x4_t,"Load multiple single-element structures to one, two, three, or four registers"
+FALSE,vld4_u8,ptr: *const u8,uint8x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_bf16,ptr: *const bfloat16_t,bfloat16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_dup_bf16,ptr: *const bfloat16_t,bfloat16x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_f16,ptr: *const float16_t,float16x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_f32,ptr: *const f32,float32x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_f64,ptr: *const float64_t,float64x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_p16,ptr: *const poly16_t,poly16x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_p64,ptr: *const poly64_t,poly64x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_p8,ptr: *const poly8_t,poly8x16x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_s16,ptr: *const i16,int16x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_s32,ptr: *const i32,int32x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_s64,ptr: *const i64,int64x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_s8,ptr: *const i8,int8x16x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_u16,ptr: *const u16,uint16x8x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_u32,ptr: *const u32,uint32x4x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_u64,ptr: *const u64,uint64x2x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_dup_u8,ptr: *const u8,uint8x16x4_t,Load single 4-element structure and replicate to all lanes of four registers
+FALSE,vld4q_f16,ptr: *const float16_t,float16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_f32,ptr: *const f32,float32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_f64,ptr: *const float64_t,float64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_bf16,"ptr: *const bfloat16_t, src: bfloat16x8x4_t, lane: const int",bfloat16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_f16,"ptr: *const float16_t, src: float16x8x4_t, lane: const int",float16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_f32,"ptr: *const f32, src: float32x4x4_t, lane: const int",float32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_f64,"ptr: *const float64_t, src: float64x2x4_t, lane: const int",float64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_p16,"ptr: *const poly16_t, src: poly16x8x4_t, lane: const int",poly16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_p64,"ptr: *const poly64_t, src: poly64x2x4_t, lane: const int",poly64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_p8,"ptr: *const poly8_t, src: poly8x16x4_t, lane: const int",poly8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_s16,"ptr: *const i16, src: int16x8x4_t, lane: const int",int16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_s32,"ptr: *const i32, src: int32x4x4_t, lane: const int",int32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_s64,"ptr: *const i64, src: int64x2x4_t, lane: const int",int64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_s8,"ptr: *const i8, src: int8x16x4_t, lane: const int",int8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_u16,"ptr: *const u16, src: uint16x8x4_t, lane: const int",uint16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_u32,"ptr: *const u32, src: uint32x4x4_t, lane: const int",uint32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_u64,"ptr: *const u64, src: uint64x2x4_t, lane: const int",uint64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_lane_u8,"ptr: *const u8, src: uint8x16x4_t, lane: const int",uint8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_p16,ptr: *const poly16_t,poly16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_p64,ptr: *const poly64_t,poly64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_p8,ptr: *const poly8_t,poly8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_s16,ptr: *const i16,int16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_s32,ptr: *const i32,int32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_s64,ptr: *const i64,int64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_s8,ptr: *const i8,int8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_u16,ptr: *const u16,uint16x8x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_u32,ptr: *const u32,uint32x4x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_u64,ptr: *const u64,uint64x2x4_t,Load multiple 4-element structures to four registers
+FALSE,vld4q_u8,ptr: *const u8,uint8x16x4_t,Load multiple 4-element structures to four registers
+FALSE,vldrq_p128,ptr: *const poly128_t,poly128_t,Load SIMD&FP register (immediate offset)
+FALSE,vmax_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point maximum
+TRUE,vmax_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point maximum
+TRUE,vmax_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point maximum
+TRUE,vmax_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed maximum
+TRUE,vmax_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed maximum
+TRUE,vmax_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed maximum
+TRUE,vmax_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned maximum
+TRUE,vmax_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned maximum
+TRUE,vmax_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned maximum
+FALSE,vmaxh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point maximum
+FALSE,vmaxnm_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point maximum number
+TRUE,vmaxnm_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point maximum number
+TRUE,vmaxnm_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point maximum number
+FALSE,vmaxnmh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point maximum number
+FALSE,vmaxnmq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point maximum number
+TRUE,vmaxnmq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point maximum number
+TRUE,vmaxnmq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point maximum number
+FALSE,vmaxnmv_f16,a: float16x4_t,float16_t,Floating-point maximum number pairwise
+FALSE,vmaxnmv_f32,a: float32x2_t,f32,Floating-point maximum number pairwise
+FALSE,vmaxnmvq_f16,a: float16x8_t,float16_t,Floating-point maximum number pairwise
+FALSE,vmaxnmvq_f32,a: float32x4_t,f32,Floating-point maximum number across vector
+FALSE,vmaxnmvq_f64,a: float64x2_t,float64_t,Floating-point maximum number pairwise
+FALSE,vmaxq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point maximum
+TRUE,vmaxq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point maximum
+TRUE,vmaxq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point maximum
+TRUE,vmaxq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed maximum
+TRUE,vmaxq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed maximum
+TRUE,vmaxq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed maximum
+TRUE,vmaxq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned maximum
+TRUE,vmaxq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned maximum
+TRUE,vmaxq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned maximum
+FALSE,vmaxv_f16,a: float16x4_t,float16_t,Floating-point maximum pairwise
+TRUE,vmaxv_f32,a: float32x2_t,f32,Floating-point maximum pairwise
+TRUE,vmaxv_s16,a: int16x4_t,i16,Signed maximum across vector
+TRUE,vmaxv_s32,a: int32x2_t,i32,Signed maximum pairwise
+TRUE,vmaxv_s8,a: int8x8_t,i8,Signed maximum across vector
+TRUE,vmaxv_u16,a: uint16x4_t,u16,Unsigned maximum across vector
+TRUE,vmaxv_u32,a: uint32x2_t,u32,Unsigned maximum pairwise
+TRUE,vmaxv_u8,a: uint8x8_t,u8,Unsigned maximum across vector
+FALSE,vmaxvq_f16,a: float16x8_t,float16_t,Floating-point maximum pairwise
+TRUE,vmaxvq_f32,a: float32x4_t,f32,Floating-point maximum across vector
+TRUE,vmaxvq_f64,a: float64x2_t,float64_t,Floating-point maximum pairwise
+TRUE,vmaxvq_s16,a: int16x8_t,i16,Signed maximum across vector
+TRUE,vmaxvq_s32,a: int32x4_t,i32,Signed maximum across vector
+TRUE,vmaxvq_s8,a: int8x16_t,i8,Signed maximum across vector
+TRUE,vmaxvq_u16,a: uint16x8_t,u16,Unsigned maximum across vector
+TRUE,vmaxvq_u32,a: uint32x4_t,u32,Unsigned maximum across vector
+TRUE,vmaxvq_u8,a: uint8x16_t,u8,Unsigned maximum across vector
+FALSE,vmin_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point minimum
+TRUE,vmin_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point minimum
+TRUE,vmin_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point minimum
+TRUE,vmin_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed minimum
+TRUE,vmin_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed minimum
+TRUE,vmin_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed minimum
+TRUE,vmin_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned minimum
+TRUE,vmin_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned minimum
+TRUE,vmin_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned minimum
+FALSE,vminh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point minimum
+FALSE,vminnm_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point minimum number
+FALSE,vminnm_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point minimum number
+FALSE,vminnm_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point minimum number
+FALSE,vminnmh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point minimum number
+FALSE,vminnmq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point minimum number
+FALSE,vminnmq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point minimum number
+FALSE,vminnmq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point minimum number
+FALSE,vminnmv_f16,a: float16x4_t,float16_t,Floating-point minimum number pairwise
+FALSE,vminnmv_f32,a: float32x2_t,f32,Floating-point minimum number pairwise
+FALSE,vminnmvq_f16,a: float16x8_t,float16_t,Floating-point minimum number pairwise
+FALSE,vminnmvq_f32,a: float32x4_t,f32,Floating-point minimum number across vector
+FALSE,vminnmvq_f64,a: float64x2_t,float64_t,Floating-point minimum number pairwise
+FALSE,vminq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point minimum
+TRUE,vminq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point minimum
+FALSE,vminq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point minimum
+TRUE,vminq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed minimum
+TRUE,vminq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed minimum
+TRUE,vminq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed minimum
+TRUE,vminq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned minimum
+TRUE,vminq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned minimum
+TRUE,vminq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned minimum
+FALSE,vminv_f16,a: float16x4_t,float16_t,Floating-point minimum pairwise
+TRUE,vminv_f32,a: float32x2_t,f32,Floating-point minimum pairwise
+TRUE,vminv_s16,a: int16x4_t,i16,Signed minimum across vector
+TRUE,vminv_s32,a: int32x2_t,i32,Signed minimum pairwise
+TRUE,vminv_s8,a: int8x8_t,i8,Signed minimum across vector
+TRUE,vminv_u16,a: uint16x4_t,u16,Unsigned minimum across vector
+TRUE,vminv_u32,a: uint32x2_t,u32,Unsigned minimum pairwise
+TRUE,vminv_u8,a: uint8x8_t,u8,Unsigned minimum across vector
+FALSE,vminvq_f16,a: float16x8_t,float16_t,Floating-point minimum pairwise
+TRUE,vminvq_f32,a: float32x4_t,f32,Floating-point minimum across vector
+TRUE,vminvq_f64,a: float64x2_t,float64_t,Floating-point minimum pairwise
+TRUE,vminvq_s16,a: int16x8_t,i16,Signed minimum across vector
+TRUE,vminvq_s32,a: int32x4_t,i32,Signed minimum across vector
+TRUE,vminvq_s8,a: int8x16_t,i8,Signed minimum across vector
+TRUE,vminvq_u16,a: uint16x8_t,u16,Unsigned minimum across vector
+TRUE,vminvq_u32,a: uint32x4_t,u32,Unsigned minimum across vector
+TRUE,vminvq_u8,a: uint8x16_t,u8,Unsigned minimum across vector
+TRUE,vmla_f32,"a: float32x2_t, b: float32x2_t, c: float32x2_t",float32x2_t,Floating-point multiply-add to accumulator
+TRUE,vmla_f64,"a: float64x1_t, b: float64x1_t, c: float64x1_t",float64x1_t,Floating-point multiply-add to accumulator
+FALSE,vmla_lane_f32,"a: float32x2_t, b: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Vector multiply accumulate with scalar
+FALSE,vmla_lane_s16,"a: int16x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Vector multiply accumulate with scalar
+FALSE,vmla_lane_s32,"a: int32x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Vector multiply accumulate with scalar
+FALSE,vmla_lane_u16,"a: uint16x4_t, b: uint16x4_t, v: uint16x4_t, lane: const int",uint16x4_t,Vector multiply accumulate with scalar
+FALSE,vmla_lane_u32,"a: uint32x2_t, b: uint32x2_t, v: uint32x2_t, lane: const int",uint32x2_t,Vector multiply accumulate with scalar
+FALSE,vmla_laneq_f32,"a: float32x2_t, b: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Multiply-Add to accumulator
+FALSE,vmla_laneq_s16,"a: int16x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Multiply-add to accumulator
+FALSE,vmla_laneq_s32,"a: int32x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Multiply-add to accumulator
+FALSE,vmla_laneq_u16,"a: uint16x4_t, b: uint16x4_t, v: uint16x8_t, lane: const int",uint16x4_t,Multiply-add to accumulator
+FALSE,vmla_laneq_u32,"a: uint32x2_t, b: uint32x2_t, v: uint32x4_t, lane: const int",uint32x2_t,Multiply-add to accumulator
+FALSE,vmla_n_f32,"a: float32x2_t, b: float32x2_t, c: f32",float32x2_t,Vector multiply accumulate with scalar
+FALSE,vmla_n_s16,"a: int16x4_t, b: int16x4_t, c: i16",int16x4_t,Vector multiply accumulate with scalar
+FALSE,vmla_n_s32,"a: int32x2_t, b: int32x2_t, c: i32",int32x2_t,Vector multiply accumulate with scalar
+FALSE,vmla_n_u16,"a: uint16x4_t, b: uint16x4_t, c: u16",uint16x4_t,Vector multiply accumulate with scalar
+FALSE,vmla_n_u32,"a: uint32x2_t, b: uint32x2_t, c: u32",uint32x2_t,Vector multiply accumulate with scalar
+TRUE,vmla_s16,"a: int16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Multiply-add to accumulator
+TRUE,vmla_s32,"a: int32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Multiply-add to accumulator
+TRUE,vmla_s8,"a: int8x8_t, b: int8x8_t, c: int8x8_t",int8x8_t,Multiply-add to accumulator
+TRUE,vmla_u16,"a: uint16x4_t, b: uint16x4_t, c: uint16x4_t",uint16x4_t,Multiply-add to accumulator
+TRUE,vmla_u32,"a: uint32x2_t, b: uint32x2_t, c: uint32x2_t",uint32x2_t,Multiply-add to accumulator
+TRUE,vmla_u8,"a: uint8x8_t, b: uint8x8_t, c: uint8x8_t",uint8x8_t,Multiply-add to accumulator
+FALSE,vmlal_high_lane_s16,"a: int32x4_t, b: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed multiply-add long
+FALSE,vmlal_high_lane_s32,"a: int64x2_t, b: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed multiply-add long
+FALSE,vmlal_high_lane_u16,"a: uint32x4_t, b: uint16x8_t, v: uint16x4_t, lane: const int",uint32x4_t,Unsigned multiply-add long
+FALSE,vmlal_high_lane_u32,"a: uint64x2_t, b: uint32x4_t, v: uint32x2_t, lane: const int",uint64x2_t,Unsigned multiply-add long
+FALSE,vmlal_high_laneq_s16,"a: int32x4_t, b: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply-add long
+FALSE,vmlal_high_laneq_s32,"a: int64x2_t, b: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply-add long
+FALSE,vmlal_high_laneq_u16,"a: uint32x4_t, b: uint16x8_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply-add long
+FALSE,vmlal_high_laneq_u32,"a: uint64x2_t, b: uint32x4_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply-add long
+FALSE,vmlal_high_n_s16,"a: int32x4_t, b: int16x8_t, c: i16",int32x4_t,Signed multiply-add long
+FALSE,vmlal_high_n_s32,"a: int64x2_t, b: int32x4_t, c: i32",int64x2_t,Signed multiply-add long
+FALSE,vmlal_high_n_u16,"a: uint32x4_t, b: uint16x8_t, c: u16",uint32x4_t,Unsigned multiply-add long
+FALSE,vmlal_high_n_u32,"a: uint64x2_t, b: uint32x4_t, c: u32",uint64x2_t,Unsigned multiply-add long
+TRUE,vmlal_high_s16,"a: int32x4_t, b: int16x8_t, c: int16x8_t",int32x4_t,Signed multiply-add long
+TRUE,vmlal_high_s32,"a: int64x2_t, b: int32x4_t, c: int32x4_t",int64x2_t,Signed multiply-add long
+TRUE,vmlal_high_s8,"a: int16x8_t, b: int8x16_t, c: int8x16_t",int16x8_t,Signed multiply-add long
+TRUE,vmlal_high_u16,"a: uint32x4_t, b: uint16x8_t, c: uint16x8_t",uint32x4_t,Unsigned multiply-add long
+TRUE,vmlal_high_u32,"a: uint64x2_t, b: uint32x4_t, c: uint32x4_t",uint64x2_t,Unsigned multiply-add long
+TRUE,vmlal_high_u8,"a: uint16x8_t, b: uint8x16_t, c: uint8x16_t",uint16x8_t,Unsigned multiply-add long
+FALSE,vmlal_lane_s16,"a: int32x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_lane_s32,"a: int64x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_lane_u16,"a: uint32x4_t, b: uint16x4_t, v: uint16x4_t, lane: const int",uint32x4_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_lane_u32,"a: uint64x2_t, b: uint32x2_t, v: uint32x2_t, lane: const int",uint64x2_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_laneq_s16,"a: int32x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply-add long
+FALSE,vmlal_laneq_s32,"a: int64x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply-add long
+FALSE,vmlal_laneq_u16,"a: uint32x4_t, b: uint16x4_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply-add long
+FALSE,vmlal_laneq_u32,"a: uint64x2_t, b: uint32x2_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply-add long
+FALSE,vmlal_n_s16,"a: int32x4_t, b: int16x4_t, c: i16",int32x4_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_n_s32,"a: int64x2_t, b: int32x2_t, c: i32",int64x2_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_n_u16,"a: uint32x4_t, b: uint16x4_t, c: u16",uint32x4_t,Vector widening multiply accumulate with scalar
+FALSE,vmlal_n_u32,"a: uint64x2_t, b: uint32x2_t, c: u32",uint64x2_t,Vector widening multiply accumulate with scalar
+TRUE,vmlal_s16,"a: int32x4_t, b: int16x4_t, c: int16x4_t",int32x4_t,Signed multiply-add long
+TRUE,vmlal_s32,"a: int64x2_t, b: int32x2_t, c: int32x2_t",int64x2_t,Signed multiply-add long
+TRUE,vmlal_s8,"a: int16x8_t, b: int8x8_t, c: int8x8_t",int16x8_t,Signed multiply-add long
+TRUE,vmlal_u16,"a: uint32x4_t, b: uint16x4_t, c: uint16x4_t",uint32x4_t,Unsigned multiply-add long
+TRUE,vmlal_u32,"a: uint64x2_t, b: uint32x2_t, c: uint32x2_t",uint64x2_t,Unsigned multiply-add long
+TRUE,vmlal_u8,"a: uint16x8_t, b: uint8x8_t, c: uint8x8_t",uint16x8_t,Unsigned multiply-add long
+TRUE,vmlaq_f32,"a: float32x4_t, b: float32x4_t, c: float32x4_t",float32x4_t,Floating-point multiply-add to accumulator
+TRUE,vmlaq_f64,"a: float64x2_t, b: float64x2_t, c: float64x2_t",float64x2_t,Floating-point multiply-add to accumulator
+FALSE,vmlaq_lane_f32,"a: float32x4_t, b: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_lane_s16,"a: int16x8_t, b: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_lane_s32,"a: int32x4_t, b: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_lane_u16,"a: uint16x8_t, b: uint16x8_t, v: uint16x4_t, lane: const int",uint16x8_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_lane_u32,"a: uint32x4_t, b: uint32x4_t, v: uint32x2_t, lane: const int",uint32x4_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_laneq_f32,"a: float32x4_t, b: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Multiply-Add to accumulator
+FALSE,vmlaq_laneq_s16,"a: int16x8_t, b: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Multiply-add to accumulator
+FALSE,vmlaq_laneq_s32,"a: int32x4_t, b: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Multiply-add to accumulator
+FALSE,vmlaq_laneq_u16,"a: uint16x8_t, b: uint16x8_t, v: uint16x8_t, lane: const int",uint16x8_t,Multiply-add to accumulator
+FALSE,vmlaq_laneq_u32,"a: uint32x4_t, b: uint32x4_t, v: uint32x4_t, lane: const int",uint32x4_t,Multiply-add to accumulator
+FALSE,vmlaq_n_f32,"a: float32x4_t, b: float32x4_t, c: f32",float32x4_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_n_s16,"a: int16x8_t, b: int16x8_t, c: i16",int16x8_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_n_s32,"a: int32x4_t, b: int32x4_t, c: i32",int32x4_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_n_u16,"a: uint16x8_t, b: uint16x8_t, c: u16",uint16x8_t,Vector multiply accumulate with scalar
+FALSE,vmlaq_n_u32,"a: uint32x4_t, b: uint32x4_t, c: u32",uint32x4_t,Vector multiply accumulate with scalar
+TRUE,vmlaq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Multiply-add to accumulator
+TRUE,vmlaq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Multiply-add to accumulator
+TRUE,vmlaq_s8,"a: int8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Multiply-add to accumulator
+TRUE,vmlaq_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Multiply-add to accumulator
+TRUE,vmlaq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Multiply-add to accumulator
+TRUE,vmlaq_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Multiply-add to accumulator
+TRUE,vmls_f32,"a: float32x2_t, b: float32x2_t, c: float32x2_t",float32x2_t,Multiply-subtract from accumulator
+TRUE,vmls_f64,"a: float64x1_t, b: float64x1_t, c: float64x1_t",float64x1_t,Multiply-subtract from accumulator
+FALSE,vmls_lane_f32,"a: float32x2_t, b: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Vector multiply subtract with scalar
+FALSE,vmls_lane_s16,"a: int16x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Vector multiply subtract with scalar
+FALSE,vmls_lane_s32,"a: int32x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Vector multiply subtract with scalar
+FALSE,vmls_lane_u16,"a: uint16x4_t, b: uint16x4_t, v: uint16x4_t, lane: const int",uint16x4_t,Vector multiply subtract with scalar
+FALSE,vmls_lane_u32,"a: uint32x2_t, b: uint32x2_t, v: uint32x2_t, lane: const int",uint32x2_t,Vector multiply subtract with scalar
+FALSE,vmls_laneq_f32,"a: float32x2_t, b: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Multiply-subtract from accumulator
+FALSE,vmls_laneq_s16,"a: int16x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Multiply-subtract from accumulator
+FALSE,vmls_laneq_s32,"a: int32x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Multiply-subtract from accumulator
+FALSE,vmls_laneq_u16,"a: uint16x4_t, b: uint16x4_t, v: uint16x8_t, lane: const int",uint16x4_t,Multiply-subtract from accumulator
+FALSE,vmls_laneq_u32,"a: uint32x2_t, b: uint32x2_t, v: uint32x4_t, lane: const int",uint32x2_t,Multiply-subtract from accumulator
+FALSE,vmls_n_f32,"a: float32x2_t, b: float32x2_t, c: f32",float32x2_t,Vector multiply subtract with scalar
+FALSE,vmls_n_s16,"a: int16x4_t, b: int16x4_t, c: i16",int16x4_t,Vector multiply subtract with scalar
+FALSE,vmls_n_s32,"a: int32x2_t, b: int32x2_t, c: i32",int32x2_t,Vector multiply subtract with scalar
+FALSE,vmls_n_u16,"a: uint16x4_t, b: uint16x4_t, c: u16",uint16x4_t,Vector multiply subtract with scalar
+FALSE,vmls_n_u32,"a: uint32x2_t, b: uint32x2_t, c: u32",uint32x2_t,Vector multiply subtract with scalar
+TRUE,vmls_s16,"a: int16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Multiply-subtract from accumulator
+TRUE,vmls_s32,"a: int32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Multiply-subtract from accumulator
+TRUE,vmls_s8,"a: int8x8_t, b: int8x8_t, c: int8x8_t",int8x8_t,Multiply-subtract from accumulator
+TRUE,vmls_u16,"a: uint16x4_t, b: uint16x4_t, c: uint16x4_t",uint16x4_t,Multiply-subtract from accumulator
+TRUE,vmls_u32,"a: uint32x2_t, b: uint32x2_t, c: uint32x2_t",uint32x2_t,Multiply-subtract from accumulator
+TRUE,vmls_u8,"a: uint8x8_t, b: uint8x8_t, c: uint8x8_t",uint8x8_t,Multiply-subtract from accumulator
+FALSE,vmlsl_high_lane_s16,"a: int32x4_t, b: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed multiply-subtract long
+FALSE,vmlsl_high_lane_s32,"a: int64x2_t, b: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed multiply-subtract long
+FALSE,vmlsl_high_lane_u16,"a: uint32x4_t, b: uint16x8_t, v: uint16x4_t, lane: const int",uint32x4_t,Unsigned multiply-subtract long
+FALSE,vmlsl_high_lane_u32,"a: uint64x2_t, b: uint32x4_t, v: uint32x2_t, lane: const int",uint64x2_t,Unsigned multiply-subtract long
+FALSE,vmlsl_high_laneq_s16,"a: int32x4_t, b: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply-subtract long
+FALSE,vmlsl_high_laneq_s32,"a: int64x2_t, b: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply-subtract long
+FALSE,vmlsl_high_laneq_u16,"a: uint32x4_t, b: uint16x8_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply-subtract long
+FALSE,vmlsl_high_laneq_u32,"a: uint64x2_t, b: uint32x4_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply-subtract long
+FALSE,vmlsl_high_n_s16,"a: int32x4_t, b: int16x8_t, c: i16",int32x4_t,Signed multiply-subtract long
+FALSE,vmlsl_high_n_s32,"a: int64x2_t, b: int32x4_t, c: i32",int64x2_t,Signed multiply-subtract long
+FALSE,vmlsl_high_n_u16,"a: uint32x4_t, b: uint16x8_t, c: u16",uint32x4_t,Unsigned multiply-subtract long
+FALSE,vmlsl_high_n_u32,"a: uint64x2_t, b: uint32x4_t, c: u32",uint64x2_t,Unsigned multiply-subtract long
+TRUE,vmlsl_high_s16,"a: int32x4_t, b: int16x8_t, c: int16x8_t",int32x4_t,Signed multiply-subtract long
+TRUE,vmlsl_high_s32,"a: int64x2_t, b: int32x4_t, c: int32x4_t",int64x2_t,Signed multiply-subtract long
+TRUE,vmlsl_high_s8,"a: int16x8_t, b: int8x16_t, c: int8x16_t",int16x8_t,Signed multiply-subtract long
+TRUE,vmlsl_high_u16,"a: uint32x4_t, b: uint16x8_t, c: uint16x8_t",uint32x4_t,Unsigned multiply-subtract long
+TRUE,vmlsl_high_u32,"a: uint64x2_t, b: uint32x4_t, c: uint32x4_t",uint64x2_t,Unsigned multiply-subtract long
+TRUE,vmlsl_high_u8,"a: uint16x8_t, b: uint8x16_t, c: uint8x16_t",uint16x8_t,Unsigned multiply-subtract long
+FALSE,vmlsl_lane_s16,"a: int32x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_lane_s32,"a: int64x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_lane_u16,"a: uint32x4_t, b: uint16x4_t, v: uint16x4_t, lane: const int",uint32x4_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_lane_u32,"a: uint64x2_t, b: uint32x2_t, v: uint32x2_t, lane: const int",uint64x2_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_laneq_s16,"a: int32x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply-subtract long
+FALSE,vmlsl_laneq_s32,"a: int64x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply-subtract long
+FALSE,vmlsl_laneq_u16,"a: uint32x4_t, b: uint16x4_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply-subtract long
+FALSE,vmlsl_laneq_u32,"a: uint64x2_t, b: uint32x2_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply-subtract long
+FALSE,vmlsl_n_s16,"a: int32x4_t, b: int16x4_t, c: i16",int32x4_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_n_s32,"a: int64x2_t, b: int32x2_t, c: i32",int64x2_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_n_u16,"a: uint32x4_t, b: uint16x4_t, c: u16",uint32x4_t,Vector widening multiply subtract with scalar
+FALSE,vmlsl_n_u32,"a: uint64x2_t, b: uint32x2_t, c: u32",uint64x2_t,Vector widening multiply subtract with scalar
+TRUE,vmlsl_s16,"a: int32x4_t, b: int16x4_t, c: int16x4_t",int32x4_t,Signed multiply-subtract long
+TRUE,vmlsl_s32,"a: int64x2_t, b: int32x2_t, c: int32x2_t",int64x2_t,Signed multiply-subtract long
+TRUE,vmlsl_s8,"a: int16x8_t, b: int8x8_t, c: int8x8_t",int16x8_t,Signed multiply-subtract long
+TRUE,vmlsl_u16,"a: uint32x4_t, b: uint16x4_t, c: uint16x4_t",uint32x4_t,Unsigned multiply-subtract long
+TRUE,vmlsl_u32,"a: uint64x2_t, b: uint32x2_t, c: uint32x2_t",uint64x2_t,Unsigned multiply-subtract long
+TRUE,vmlsl_u8,"a: uint16x8_t, b: uint8x8_t, c: uint8x8_t",uint16x8_t,Unsigned multiply-subtract long
+TRUE,vmlsq_f32,"a: float32x4_t, b: float32x4_t, c: float32x4_t",float32x4_t,Multiply-subtract from accumulator
+TRUE,vmlsq_f64,"a: float64x2_t, b: float64x2_t, c: float64x2_t",float64x2_t,Multiply-subtract from accumulator
+FALSE,vmlsq_lane_f32,"a: float32x4_t, b: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Vector multiply subtract with scalar
+FALSE,vmlsq_lane_s16,"a: int16x8_t, b: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Vector multiply subtract with scalar
+FALSE,vmlsq_lane_s32,"a: int32x4_t, b: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Vector multiply subtract with scalar
+FALSE,vmlsq_lane_u16,"a: uint16x8_t, b: uint16x8_t, v: uint16x4_t, lane: const int",uint16x8_t,Vector multiply subtract with scalar
+FALSE,vmlsq_lane_u32,"a: uint32x4_t, b: uint32x4_t, v: uint32x2_t, lane: const int",uint32x4_t,Vector multiply subtract with scalar
+FALSE,vmlsq_laneq_f32,"a: float32x4_t, b: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Multiply-subtract from accumulator
+FALSE,vmlsq_laneq_s16,"a: int16x8_t, b: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Multiply-subtract from accumulator
+FALSE,vmlsq_laneq_s32,"a: int32x4_t, b: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Multiply-subtract from accumulator
+FALSE,vmlsq_laneq_u16,"a: uint16x8_t, b: uint16x8_t, v: uint16x8_t, lane: const int",uint16x8_t,Multiply-subtract from accumulator
+FALSE,vmlsq_laneq_u32,"a: uint32x4_t, b: uint32x4_t, v: uint32x4_t, lane: const int",uint32x4_t,Multiply-subtract from accumulator
+FALSE,vmlsq_n_f32,"a: float32x4_t, b: float32x4_t, c: f32",float32x4_t,Vector multiply subtract with scalar
+FALSE,vmlsq_n_s16,"a: int16x8_t, b: int16x8_t, c: i16",int16x8_t,Vector multiply subtract with scalar
+FALSE,vmlsq_n_s32,"a: int32x4_t, b: int32x4_t, c: i32",int32x4_t,Vector multiply subtract with scalar
+FALSE,vmlsq_n_u16,"a: uint16x8_t, b: uint16x8_t, c: u16",uint16x8_t,Vector multiply subtract with scalar
+FALSE,vmlsq_n_u32,"a: uint32x4_t, b: uint32x4_t, c: u32",uint32x4_t,Vector multiply subtract with scalar
+TRUE,vmlsq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Multiply-subtract from accumulator
+TRUE,vmlsq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Multiply-subtract from accumulator
+TRUE,vmlsq_s8,"a: int8x16_t, b: int8x16_t, c: int8x16_t",int8x16_t,Multiply-subtract from accumulator
+TRUE,vmlsq_u16,"a: uint16x8_t, b: uint16x8_t, c: uint16x8_t",uint16x8_t,Multiply-subtract from accumulator
+TRUE,vmlsq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,Multiply-subtract from accumulator
+TRUE,vmlsq_u8,"a: uint8x16_t, b: uint8x16_t, c: uint8x16_t",uint8x16_t,Multiply-subtract from accumulator
+FALSE,vmmlaq_s32,"r: int32x4_t, a: int8x16_t, b: int8x16_t",int32x4_t,Signed 8-bit integer matrix multiply-accumulate
+FALSE,vmmlaq_u32,"r: uint32x4_t, a: uint8x16_t, b: uint8x16_t",uint32x4_t,Unsigned 8-bit integer matrix multiply-accumulate
+FALSE,vmov_n_f16,value: float16_t,float16x4_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_f32,value: f32,float32x2_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_f64,value: float64_t,float64x1_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_p16,value: poly16_t,poly16x4_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_p8,value: poly8_t,poly8x8_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_s16,value: i16,int16x4_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_s32,value: i32,int32x2_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_s64,value: i64,int64x1_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_s8,value: i8,int8x8_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_u16,value: u16,uint16x4_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_u32,value: u32,uint32x2_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_u64,value: u64,uint64x1_t,Duplicate vector element to vector or scalar
+TRUE,vmov_n_u8,value: u8,uint8x8_t,Duplicate vector element to vector or scalar
+FALSE,vmovl_high_s16,a: int16x8_t,int32x4_t,Vector move
+FALSE,vmovl_high_s32,a: int32x4_t,int64x2_t,Vector move
+FALSE,vmovl_high_s8,a: int8x16_t,int16x8_t,Vector move
+FALSE,vmovl_high_u16,a: uint16x8_t,uint32x4_t,Vector move
+FALSE,vmovl_high_u32,a: uint32x4_t,uint64x2_t,Vector move
+FALSE,vmovl_high_u8,a: uint8x16_t,uint16x8_t,Vector move
+TRUE,vmovl_s16,a: int16x4_t,int32x4_t,Vector move
+TRUE,vmovl_s32,a: int32x2_t,int64x2_t,Vector move
+TRUE,vmovl_s8,a: int8x8_t,int16x8_t,Vector move
+TRUE,vmovl_u16,a: uint16x4_t,uint32x4_t,Vector move
+TRUE,vmovl_u32,a: uint32x2_t,uint64x2_t,Vector move
+TRUE,vmovl_u8,a: uint8x8_t,uint16x8_t,Vector move
+TRUE,vmovn_high_s16,"r: int8x8_t, a: int16x8_t",int8x16_t,Extract narrow
+TRUE,vmovn_high_s32,"r: int16x4_t, a: int32x4_t",int16x8_t,Extract narrow
+TRUE,vmovn_high_s64,"r: int32x2_t, a: int64x2_t",int32x4_t,Extract narrow
+TRUE,vmovn_high_u16,"r: uint8x8_t, a: uint16x8_t",uint8x16_t,Extract narrow
+TRUE,vmovn_high_u32,"r: uint16x4_t, a: uint32x4_t",uint16x8_t,Extract narrow
+TRUE,vmovn_high_u64,"r: uint32x2_t, a: uint64x2_t",uint32x4_t,Extract narrow
+TRUE,vmovn_s16,a: int16x8_t,int8x8_t,Extract narrow
+TRUE,vmovn_s32,a: int32x4_t,int16x4_t,Extract narrow
+TRUE,vmovn_s64,a: int64x2_t,int32x2_t,Extract narrow
+TRUE,vmovn_u16,a: uint16x8_t,uint8x8_t,Extract narrow
+TRUE,vmovn_u32,a: uint32x4_t,uint16x4_t,Extract narrow
+TRUE,vmovn_u64,a: uint64x2_t,uint32x2_t,Extract narrow
+FALSE,vmovq_n_f16,value: float16_t,float16x8_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_f32,value: f32,float32x4_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_f64,value: float64_t,float64x2_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_p16,value: poly16_t,poly16x8_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_p8,value: poly8_t,poly8x16_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_s16,value: i16,int16x8_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_s32,value: i32,int32x4_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_s64,value: i64,int64x2_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_s8,value: i8,int8x16_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_u16,value: u16,uint16x8_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_u32,value: u32,uint32x4_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_u64,value: u64,uint64x2_t,Duplicate vector element to vector or scalar
+TRUE,vmovq_n_u8,value: u8,uint8x16_t,Duplicate vector element to vector or scalar
+FALSE,vmul_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point multiply
+TRUE,vmul_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point multiply
+TRUE,vmul_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point multiply
+FALSE,vmul_lane_f16,"a: float16x4_t, v: float16x4_t, lane: const int",float16x4_t,Floating-point multiply
+FALSE,vmul_lane_f32,"a: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Floating-point multiply
+FALSE,vmul_lane_f64,"a: float64x1_t, v: float64x1_t, lane: const int",float64x1_t,Floating-point multiply
+FALSE,vmul_lane_s16,"a: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Multiply
+FALSE,vmul_lane_s32,"a: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Multiply
+FALSE,vmul_lane_u16,"a: uint16x4_t, v: uint16x4_t, lane: const int",uint16x4_t,Multiply
+FALSE,vmul_lane_u32,"a: uint32x2_t, v: uint32x2_t, lane: const int",uint32x2_t,Multiply
+FALSE,vmul_laneq_f16,"a: float16x4_t, v: float16x8_t, lane: const int",float16x4_t,Floating-point multiply
+FALSE,vmul_laneq_f32,"a: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Floating-point multiply
+FALSE,vmul_laneq_f64,"a: float64x1_t, v: float64x2_t, lane: const int",float64x1_t,Floating-point multiply
+FALSE,vmul_laneq_s16,"a: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Multiply
+FALSE,vmul_laneq_s32,"a: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Multiply
+FALSE,vmul_laneq_u16,"a: uint16x4_t, v: uint16x8_t, lane: const int",uint16x4_t,Multiply
+FALSE,vmul_laneq_u32,"a: uint32x2_t, v: uint32x4_t, lane: const int",uint32x2_t,Multiply
+FALSE,vmul_n_f16,"a: float16x4_t, n: float16_t",float16x4_t,Floating-point multiply
+FALSE,vmul_n_f32,"a: float32x2_t, b: f32",float32x2_t,Vector multiply by scalar
+FALSE,vmul_n_f64,"a: float64x1_t, b: float64_t",float64x1_t,Floating-point multiply
+FALSE,vmul_n_s16,"a: int16x4_t, b: i16",int16x4_t,Vector multiply by scalar
+FALSE,vmul_n_s32,"a: int32x2_t, b: i32",int32x2_t,Vector multiply by scalar
+FALSE,vmul_n_u16,"a: uint16x4_t, b: u16",uint16x4_t,Vector multiply by scalar
+FALSE,vmul_n_u32,"a: uint32x2_t, b: u32",uint32x2_t,Vector multiply by scalar
+FALSE,vmul_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Polynomial multiply
+TRUE,vmul_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Multiply
+TRUE,vmul_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Multiply
+TRUE,vmul_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Multiply
+TRUE,vmul_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Multiply
+TRUE,vmul_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Multiply
+TRUE,vmul_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Multiply
+FALSE,vmuld_lane_f64,"a: float64_t, v: float64x1_t, lane: const int",float64_t,Floating-point multiply
+FALSE,vmuld_laneq_f64,"a: float64_t, v: float64x2_t, lane: const int",float64_t,Floating-point multiply
+FALSE,vmulh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point multiply
+FALSE,vmulh_lane_f16,"a: float16_t, v: float16x4_t, lane: const int",float16_t,Floating-point multiply
+FALSE,vmulh_laneq_f16,"a: float16_t, v: float16x8_t, lane: const int",float16_t,Floating-point multiply
+FALSE,vmull_high_lane_s16,"a: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed multiply long
+FALSE,vmull_high_lane_s32,"a: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed multiply long
+FALSE,vmull_high_lane_u16,"a: uint16x8_t, v: uint16x4_t, lane: const int",uint32x4_t,Unsigned multiply long
+FALSE,vmull_high_lane_u32,"a: uint32x4_t, v: uint32x2_t, lane: const int",uint64x2_t,Unsigned multiply long
+FALSE,vmull_high_laneq_s16,"a: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply long
+FALSE,vmull_high_laneq_s32,"a: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply long
+FALSE,vmull_high_laneq_u16,"a: uint16x8_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply long
+FALSE,vmull_high_laneq_u32,"a: uint32x4_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply long
+FALSE,vmull_high_n_s16,"a: int16x8_t, b: i16",int32x4_t,Signed multiply long
+FALSE,vmull_high_n_s32,"a: int32x4_t, b: i32",int64x2_t,Signed multiply long
+FALSE,vmull_high_n_u16,"a: uint16x8_t, b: u16",uint32x4_t,Unsigned multiply long
+FALSE,vmull_high_n_u32,"a: uint32x4_t, b: u32",uint64x2_t,Unsigned multiply long
+FALSE,vmull_high_p64,"a: poly64x2_t, b: poly64x2_t",poly128_t,Polynomial multiply long
+TRUE,vmull_high_p8,"a: poly8x16_t, b: poly8x16_t",poly16x8_t,Polynomial multiply long
+TRUE,vmull_high_s16,"a: int16x8_t, b: int16x8_t",int32x4_t,Signed multiply long
+TRUE,vmull_high_s32,"a: int32x4_t, b: int32x4_t",int64x2_t,Signed multiply long
+TRUE,vmull_high_s8,"a: int8x16_t, b: int8x16_t",int16x8_t,Signed multiply long
+TRUE,vmull_high_u16,"a: uint16x8_t, b: uint16x8_t",uint32x4_t,Unsigned multiply long
+TRUE,vmull_high_u32,"a: uint32x4_t, b: uint32x4_t",uint64x2_t,Unsigned multiply long
+TRUE,vmull_high_u8,"a: uint8x16_t, b: uint8x16_t",uint16x8_t,Unsigned multiply long
+FALSE,vmull_lane_s16,"a: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector long multiply by scalar
+FALSE,vmull_lane_s32,"a: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector long multiply by scalar
+FALSE,vmull_lane_u16,"a: uint16x4_t, v: uint16x4_t, lane: const int",uint32x4_t,Vector long multiply by scalar
+FALSE,vmull_lane_u32,"a: uint32x2_t, v: uint32x2_t, lane: const int",uint64x2_t,Vector long multiply by scalar
+FALSE,vmull_laneq_s16,"a: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed multiply long
+FALSE,vmull_laneq_s32,"a: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed multiply long
+FALSE,vmull_laneq_u16,"a: uint16x4_t, v: uint16x8_t, lane: const int",uint32x4_t,Unsigned multiply long
+FALSE,vmull_laneq_u32,"a: uint32x2_t, v: uint32x4_t, lane: const int",uint64x2_t,Unsigned multiply long
+FALSE,vmull_n_s16,"a: int16x4_t, b: i16",int32x4_t,Vector long multiply with scalar
+FALSE,vmull_n_s32,"a: int32x2_t, b: i32",int64x2_t,Vector long multiply with scalar
+FALSE,vmull_n_u16,"a: uint16x4_t, b: u16",uint32x4_t,Vector long multiply with scalar
+FALSE,vmull_n_u32,"a: uint32x2_t, b: u32",uint64x2_t,Vector long multiply with scalar
+TRUE,vmull_p64,"a: poly64_t, b: poly64_t",poly128_t,Polynomial multiply long
+TRUE,vmull_p8,"a: poly8x8_t, b: poly8x8_t",poly16x8_t,Polynomial multiply long
+TRUE,vmull_s16,"a: int16x4_t, b: int16x4_t",int32x4_t,Signed multiply long
+TRUE,vmull_s32,"a: int32x2_t, b: int32x2_t",int64x2_t,Signed multiply long
+TRUE,vmull_s8,"a: int8x8_t, b: int8x8_t",int16x8_t,Signed multiply long
+TRUE,vmull_u16,"a: uint16x4_t, b: uint16x4_t",uint32x4_t,Unsigned multiply long
+TRUE,vmull_u32,"a: uint32x2_t, b: uint32x2_t",uint64x2_t,Unsigned multiply long
+TRUE,vmull_u8,"a: uint8x8_t, b: uint8x8_t",uint16x8_t,Unsigned multiply long
+FALSE,vmulq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point multiply
+TRUE,vmulq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point multiply
+TRUE,vmulq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point multiply
+FALSE,vmulq_lane_f16,"a: float16x8_t, v: float16x4_t, lane: const int",float16x8_t,Floating-point multiply
+FALSE,vmulq_lane_f32,"a: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Floating-point multiply
+FALSE,vmulq_lane_f64,"a: float64x2_t, v: float64x1_t, lane: const int",float64x2_t,Floating-point multiply
+FALSE,vmulq_lane_s16,"a: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Multiply
+FALSE,vmulq_lane_s32,"a: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Multiply
+FALSE,vmulq_lane_u16,"a: uint16x8_t, v: uint16x4_t, lane: const int",uint16x8_t,Multiply
+FALSE,vmulq_lane_u32,"a: uint32x4_t, v: uint32x2_t, lane: const int",uint32x4_t,Multiply
+FALSE,vmulq_laneq_f16,"a: float16x8_t, v: float16x8_t, lane: const int",float16x8_t,Floating-point multiply
+FALSE,vmulq_laneq_f32,"a: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Floating-point multiply
+FALSE,vmulq_laneq_f64,"a: float64x2_t, v: float64x2_t, lane: const int",float64x2_t,Floating-point multiply
+FALSE,vmulq_laneq_s16,"a: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Multiply
+FALSE,vmulq_laneq_s32,"a: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Multiply
+FALSE,vmulq_laneq_u16,"a: uint16x8_t, v: uint16x8_t, lane: const int",uint16x8_t,Multiply
+FALSE,vmulq_laneq_u32,"a: uint32x4_t, v: uint32x4_t, lane: const int",uint32x4_t,Multiply
+FALSE,vmulq_n_f16,"a: float16x8_t, n: float16_t",float16x8_t,Floating-point multiply
+FALSE,vmulq_n_f32,"a: float32x4_t, b: f32",float32x4_t,Vector multiply by scalar
+FALSE,vmulq_n_f64,"a: float64x2_t, b: float64_t",float64x2_t,Floating-point multiply
+FALSE,vmulq_n_s16,"a: int16x8_t, b: i16",int16x8_t,Vector multiply by scalar
+FALSE,vmulq_n_s32,"a: int32x4_t, b: i32",int32x4_t,Vector multiply by scalar
+FALSE,vmulq_n_u16,"a: uint16x8_t, b: u16",uint16x8_t,Vector multiply by scalar
+FALSE,vmulq_n_u32,"a: uint32x4_t, b: u32",uint32x4_t,Vector multiply by scalar
+FALSE,vmulq_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Polynomial multiply
+TRUE,vmulq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Multiply
+TRUE,vmulq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Multiply
+TRUE,vmulq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Multiply
+TRUE,vmulq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Multiply
+TRUE,vmulq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Multiply
+TRUE,vmulq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Multiply
+FALSE,vmuls_lane_f32,"a: f32, v: float32x2_t, lane: const int",f32,Floating-point multiply
+FALSE,vmuls_laneq_f32,"a: f32, v: float32x4_t, lane: const int",f32,Floating-point multiply
+FALSE,vmulx_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point multiply extended
+FALSE,vmulx_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point multiply extended
+FALSE,vmulx_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point multiply extended
+FALSE,vmulx_lane_f16,"a: float16x4_t, v: float16x4_t, lane: const int",float16x4_t,Floating-point multiply extended
+FALSE,vmulx_lane_f32,"a: float32x2_t, v: float32x2_t, lane: const int",float32x2_t,Floating-point multiply extended
+FALSE,vmulx_lane_f64,"a: float64x1_t, v: float64x1_t, lane: const int",float64x1_t,Floating-point multiply extended
+FALSE,vmulx_laneq_f16,"a: float16x4_t, v: float16x8_t, lane: const int",float16x4_t,Floating-point multiply extended
+FALSE,vmulx_laneq_f32,"a: float32x2_t, v: float32x4_t, lane: const int",float32x2_t,Floating-point multiply extended
+FALSE,vmulx_laneq_f64,"a: float64x1_t, v: float64x2_t, lane: const int",float64x1_t,Floating-point multiply extended
+FALSE,vmulx_n_f16,"a: float16x4_t, n: float16_t",float16x4_t,Floating-point multiply extended
+FALSE,vmulxd_f64,"a: float64_t, b: float64_t",float64_t,Floating-point multiply extended
+FALSE,vmulxd_lane_f64,"a: float64_t, v: float64x1_t, lane: const int",float64_t,Floating-point multiply extended
+FALSE,vmulxd_laneq_f64,"a: float64_t, v: float64x2_t, lane: const int",float64_t,Floating-point multiply extended
+FALSE,vmulxh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point multiply extended
+FALSE,vmulxh_lane_f16,"a: float16_t, v: float16x4_t, lane: const int",float16_t,Floating-point multiply extended
+FALSE,vmulxh_laneq_f16,"a: float16_t, v: float16x8_t, lane: const int",float16_t,Floating-point multiply extended
+FALSE,vmulxq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point multiply extended
+FALSE,vmulxq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point multiply extended
+FALSE,vmulxq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point multiply extended
+FALSE,vmulxq_lane_f16,"a: float16x8_t, v: float16x4_t, lane: const int",float16x8_t,Floating-point multiply extended
+FALSE,vmulxq_lane_f32,"a: float32x4_t, v: float32x2_t, lane: const int",float32x4_t,Floating-point multiply extended
+FALSE,vmulxq_lane_f64,"a: float64x2_t, v: float64x1_t, lane: const int",float64x2_t,Floating-point multiply extended
+FALSE,vmulxq_laneq_f16,"a: float16x8_t, v: float16x8_t, lane: const int",float16x8_t,Floating-point multiply extended
+FALSE,vmulxq_laneq_f32,"a: float32x4_t, v: float32x4_t, lane: const int",float32x4_t,Floating-point multiply extended
+FALSE,vmulxq_laneq_f64,"a: float64x2_t, v: float64x2_t, lane: const int",float64x2_t,Floating-point multiply extended
+FALSE,vmulxq_n_f16,"a: float16x8_t, n: float16_t",float16x8_t,Floating-point multiply extended
+FALSE,vmulxs_f32,"a: f32, b: f32",f32,Floating-point multiply extended
+FALSE,vmulxs_lane_f32,"a: f32, v: float32x2_t, lane: const int",f32,Floating-point multiply extended
+FALSE,vmulxs_laneq_f32,"a: f32, v: float32x4_t, lane: const int",f32,Floating-point multiply extended
+TRUE,vmvn_p8,a: poly8x8_t,poly8x8_t,Bitwise NOT
+TRUE,vmvn_s16,a: int16x4_t,int16x4_t,Bitwise NOT
+TRUE,vmvn_s32,a: int32x2_t,int32x2_t,Bitwise NOT
+TRUE,vmvn_s8,a: int8x8_t,int8x8_t,Bitwise NOT
+TRUE,vmvn_u16,a: uint16x4_t,uint16x4_t,Bitwise NOT
+TRUE,vmvn_u32,a: uint32x2_t,uint32x2_t,Bitwise NOT
+TRUE,vmvn_u8,a: uint8x8_t,uint8x8_t,Bitwise NOT
+TRUE,vmvnq_p8,a: poly8x16_t,poly8x16_t,Bitwise NOT
+TRUE,vmvnq_s16,a: int16x8_t,int16x8_t,Bitwise NOT
+TRUE,vmvnq_s32,a: int32x4_t,int32x4_t,Bitwise NOT
+TRUE,vmvnq_s8,a: int8x16_t,int8x16_t,Bitwise NOT
+TRUE,vmvnq_u16,a: uint16x8_t,uint16x8_t,Bitwise NOT
+TRUE,vmvnq_u32,a: uint32x4_t,uint32x4_t,Bitwise NOT
+TRUE,vmvnq_u8,a: uint8x16_t,uint8x16_t,Bitwise NOT
+FALSE,vneg_f16,a: float16x4_t,float16x4_t,Floating-point negate
+TRUE,vneg_f32,a: float32x2_t,float32x2_t,Floating-point negate
+TRUE,vneg_f64,a: float64x1_t,float64x1_t,Floating-point negate
+TRUE,vneg_s16,a: int16x4_t,int16x4_t,Negate
+TRUE,vneg_s32,a: int32x2_t,int32x2_t,Negate
+TRUE,vneg_s64,a: int64x1_t,int64x1_t,Negate
+TRUE,vneg_s8,a: int8x8_t,int8x8_t,Negate
+FALSE,vnegd_s64,a: i64,i64,Negate
+FALSE,vnegh_f16,a: float16_t,float16_t,Floating-point negate
+FALSE,vnegq_f16,a: float16x8_t,float16x8_t,Floating-point negate
+TRUE,vnegq_f32,a: float32x4_t,float32x4_t,Floating-point negate
+TRUE,vnegq_f64,a: float64x2_t,float64x2_t,Floating-point negate
+TRUE,vnegq_s16,a: int16x8_t,int16x8_t,Negate
+TRUE,vnegq_s32,a: int32x4_t,int32x4_t,Negate
+TRUE,vnegq_s64,a: int64x2_t,int64x2_t,Negate
+TRUE,vnegq_s8,a: int8x16_t,int8x16_t,Negate
+TRUE,vorn_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Bitwise inclusive OR NOT
+TRUE,vorn_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Bitwise inclusive OR NOT
+TRUE,vorn_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Bitwise inclusive OR NOT
+TRUE,vorn_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Bitwise inclusive OR NOT
+TRUE,vorn_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Bitwise inclusive OR NOT
+TRUE,vorn_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Bitwise inclusive OR NOT
+TRUE,vorn_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Bitwise inclusive OR NOT
+TRUE,vorn_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Bitwise inclusive OR NOT
+TRUE,vornq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Bitwise inclusive OR NOT
+TRUE,vornq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Bitwise inclusive OR NOT
+TRUE,vornq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Bitwise inclusive OR NOT
+TRUE,vornq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Bitwise inclusive OR NOT
+TRUE,vornq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Bitwise inclusive OR NOT
+TRUE,vornq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Bitwise inclusive OR NOT
+TRUE,vornq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Bitwise inclusive OR NOT
+TRUE,vornq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Bitwise inclusive OR NOT
+TRUE,vorr_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorr_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vorrq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,"Bitwise inclusive OR (vector, immediate)"
+TRUE,vpadal_s16,"a: int32x2_t, b: int16x4_t",int32x2_t,Signed add and accumulate long pairwise
+TRUE,vpadal_s32,"a: int64x1_t, b: int32x2_t",int64x1_t,Signed add and accumulate long pairwise
+TRUE,vpadal_s8,"a: int16x4_t, b: int8x8_t",int16x4_t,Signed add and accumulate long pairwise
+FALSE,vpadal_u16,"a: uint32x2_t, b: uint16x4_t",uint32x2_t,Unsigned add and accumulate long pairwise
+FALSE,vpadal_u32,"a: uint64x1_t, b: uint32x2_t",uint64x1_t,Unsigned add and accumulate long pairwise
+FALSE,vpadal_u8,"a: uint16x4_t, b: uint8x8_t",uint16x4_t,Unsigned add and accumulate long pairwise
+FALSE,vpadalq_s16,"a: int32x4_t, b: int16x8_t",int32x4_t,Signed add and accumulate long pairwise
+FALSE,vpadalq_s32,"a: int64x2_t, b: int32x4_t",int64x2_t,Signed add and accumulate long pairwise
+FALSE,vpadalq_s8,"a: int16x8_t, b: int8x16_t",int16x8_t,Signed add and accumulate long pairwise
+FALSE,vpadalq_u16,"a: uint32x4_t, b: uint16x8_t",uint32x4_t,Unsigned add and accumulate long pairwise
+FALSE,vpadalq_u32,"a: uint64x2_t, b: uint32x4_t",uint64x2_t,Unsigned add and accumulate long pairwise
+FALSE,vpadalq_u8,"a: uint16x8_t, b: uint8x16_t",uint16x8_t,Unsigned add and accumulate long pairwise
+FALSE,vpadd_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point add pairwise
+FALSE,vpadd_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point add pairwise
+TRUE,vpadd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Add pairwise
+TRUE,vpadd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Add pairwise
+TRUE,vpadd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Add pairwise
+TRUE,vpadd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Add pairwise
+TRUE,vpadd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Add pairwise
+TRUE,vpadd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Add pairwise
+FALSE,vpaddd_f64,a: float64x2_t,float64_t,Floating-point add pairwise
+TRUE,vpaddd_s64,a: int64x2_t,i64,Add pairwise
+TRUE,vpaddd_u64,a: uint64x2_t,u64,Add pairwise
+FALSE,vpaddl_s16,a: int16x4_t,int32x2_t,Signed add long pairwise
+FALSE,vpaddl_s32,a: int32x2_t,int64x1_t,Signed add long pairwise
+FALSE,vpaddl_s8,a: int8x8_t,int16x4_t,Signed add long pairwise
+FALSE,vpaddl_u16,a: uint16x4_t,uint32x2_t,Unsigned add long pairwise
+FALSE,vpaddl_u32,a: uint32x2_t,uint64x1_t,Unsigned add long pairwise
+FALSE,vpaddl_u8,a: uint8x8_t,uint16x4_t,Unsigned add long pairwise
+FALSE,vpaddlq_s16,a: int16x8_t,int32x4_t,Signed add long pairwise
+FALSE,vpaddlq_s32,a: int32x4_t,int64x2_t,Signed add long pairwise
+FALSE,vpaddlq_s8,a: int8x16_t,int16x8_t,Signed add long pairwise
+FALSE,vpaddlq_u16,a: uint16x8_t,uint32x4_t,Unsigned add long pairwise
+FALSE,vpaddlq_u32,a: uint32x4_t,uint64x2_t,Unsigned add long pairwise
+FALSE,vpaddlq_u8,a: uint8x16_t,uint16x8_t,Unsigned add long pairwise
+FALSE,vpaddq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point add pairwise
+FALSE,vpaddq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point add pairwise
+FALSE,vpaddq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point add pairwise
+TRUE,vpaddq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Add pairwise
+TRUE,vpaddq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Add pairwise
+FALSE,vpaddq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Add pairwise
+TRUE,vpaddq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Add pairwise
+TRUE,vpaddq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Add pairwise
+TRUE,vpaddq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Add pairwise
+FALSE,vpaddq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Add pairwise
+TRUE,vpaddq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Add pairwise
+FALSE,vpadds_f32,a: float32x2_t,f32,Floating-point add pairwise
+FALSE,vpmax_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point maximum pairwise
+TRUE,vpmax_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point maximum pairwise
+TRUE,vpmax_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed maximum pairwise
+TRUE,vpmax_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed maximum pairwise
+TRUE,vpmax_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed maximum pairwise
+TRUE,vpmax_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned maximum pairwise
+TRUE,vpmax_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned maximum pairwise
+TRUE,vpmax_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned maximum pairwise
+FALSE,vpmaxnm_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point maximum number pairwise
+TRUE,vpmaxnm_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point maximum number pairwise
+FALSE,vpmaxnmq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point maximum number pairwise
+TRUE,vpmaxnmq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point maximum number pairwise
+TRUE,vpmaxnmq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point maximum number pairwise
+FALSE,vpmaxnmqd_f64,a: float64x2_t,float64_t,Floating-point maximum number pairwise
+FALSE,vpmaxnms_f32,a: float32x2_t,f32,Floating-point maximum number pairwise
+FALSE,vpmaxq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point maximum pairwise
+TRUE,vpmaxq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point maximum pairwise
+TRUE,vpmaxq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point maximum pairwise
+TRUE,vpmaxq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed maximum pairwise
+TRUE,vpmaxq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed maximum pairwise
+TRUE,vpmaxq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed maximum pairwise
+TRUE,vpmaxq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned maximum pairwise
+TRUE,vpmaxq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned maximum pairwise
+TRUE,vpmaxq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned maximum pairwise
+FALSE,vpmaxqd_f64,a: float64x2_t,float64_t,Floating-point maximum pairwise
+FALSE,vpmaxs_f32,a: float32x2_t,f32,Floating-point maximum pairwise
+FALSE,vpmin_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point minimum pairwise
+TRUE,vpmin_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point minimum pairwise
+TRUE,vpmin_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed minimum pairwise
+TRUE,vpmin_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed minimum pairwise
+TRUE,vpmin_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed minimum pairwise
+TRUE,vpmin_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned minimum pairwise
+TRUE,vpmin_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned minimum pairwise
+TRUE,vpmin_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned minimum pairwise
+FALSE,vpminnm_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point minimum number pairwise
+FALSE,vpminnm_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point minimum number pairwise
+FALSE,vpminnmq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point minimum number pairwise
+FALSE,vpminnmq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point minimum number pairwise
+FALSE,vpminnmq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point minimum number pairwise
+FALSE,vpminnmqd_f64,a: float64x2_t,float64_t,Floating-point minimum number pairwise
+FALSE,vpminnms_f32,a: float32x2_t,f32,Floating-point minimum number pairwise
+FALSE,vpminq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point minimum pairwise
+TRUE,vpminq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point minimum pairwise
+TRUE,vpminq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point minimum pairwise
+TRUE,vpminq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed minimum pairwise
+TRUE,vpminq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed minimum pairwise
+TRUE,vpminq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed minimum pairwise
+TRUE,vpminq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned minimum pairwise
+TRUE,vpminq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned minimum pairwise
+TRUE,vpminq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned minimum pairwise
+FALSE,vpminqd_f64,a: float64x2_t,float64_t,Floating-point minimum pairwise
+FALSE,vpmins_f32,a: float32x2_t,f32,Floating-point minimum pairwise
+TRUE,vqabs_s16,a: int16x4_t,int16x4_t,Signed saturating absolute value
+TRUE,vqabs_s32,a: int32x2_t,int32x2_t,Signed saturating absolute value
+TRUE,vqabs_s64,a: int64x1_t,int64x1_t,Signed saturating absolute value
+TRUE,vqabs_s8,a: int8x8_t,int8x8_t,Signed saturating absolute value
+FALSE,vqabsb_s8,a: i8,i8,Signed saturating absolute value
+FALSE,vqabsd_s64,a: i64,i64,Signed saturating absolute value
+FALSE,vqabsh_s16,a: i16,i16,Signed saturating absolute value
+TRUE,vqabsq_s16,a: int16x8_t,int16x8_t,Signed saturating absolute value
+TRUE,vqabsq_s32,a: int32x4_t,int32x4_t,Signed saturating absolute value
+TRUE,vqabsq_s64,a: int64x2_t,int64x2_t,Signed saturating absolute value
+TRUE,vqabsq_s8,a: int8x16_t,int8x16_t,Signed saturating absolute value
+FALSE,vqabss_s32,a: i32,i32,Signed saturating absolute value
+TRUE,vqadd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating add
+TRUE,vqadd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating add
+TRUE,vqadd_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed saturating add
+TRUE,vqadd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed saturating add
+TRUE,vqadd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned saturating add
+TRUE,vqadd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned saturating add
+TRUE,vqadd_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Unsigned saturating add
+TRUE,vqadd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned saturating add
+TRUE,vqaddb_s8,"a: i8, b: i8",i8,Signed saturating add
+TRUE,vqaddb_u8,"a: u8, b: u8",u8,Unsigned saturating add
+TRUE,vqaddd_s64,"a: i64, b: i64",i64,Signed saturating add
+TRUE,vqaddd_u64,"a: u64, b: u64",u64,Unsigned saturating add
+TRUE,vqaddh_s16,"a: i16, b: i16",i16,Signed saturating add
+TRUE,vqaddh_u16,"a: u16, b: u16",u16,Unsigned saturating add
+TRUE,vqaddq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating add
+TRUE,vqaddq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating add
+TRUE,vqaddq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed saturating add
+TRUE,vqaddq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed saturating add
+TRUE,vqaddq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned saturating add
+TRUE,vqaddq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned saturating add
+TRUE,vqaddq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Unsigned saturating add
+TRUE,vqaddq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned saturating add
+TRUE,vqadds_s32,"a: i32, b: i32",i32,Signed saturating add
+TRUE,vqadds_u32,"a: u32, b: u32",u32,Unsigned saturating add
+TRUE,vqdmlal_high_lane_s16,"a: int32x4_t, b: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_lane_s32,"a: int64x2_t, b: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_laneq_s16,"a: int32x4_t, b: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_laneq_s32,"a: int64x2_t, b: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_n_s16,"a: int32x4_t, b: int16x8_t, c: i16",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_n_s32,"a: int64x2_t, b: int32x4_t, c: i32",int64x2_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_s16,"a: int32x4_t, b: int16x8_t, c: int16x8_t",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_high_s32,"a: int64x2_t, b: int32x4_t, c: int32x4_t",int64x2_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_lane_s16,"a: int32x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector widening saturating doubling multiply accumulate with scalar
+TRUE,vqdmlal_lane_s32,"a: int64x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector widening saturating doubling multiply accumulate with scalar
+TRUE,vqdmlal_laneq_s16,"a: int32x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_laneq_s32,"a: int64x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_n_s16,"a: int32x4_t, b: int16x4_t, c: i16",int32x4_t,Vector widening saturating doubling multiply accumulate with scalar
+TRUE,vqdmlal_n_s32,"a: int64x2_t, b: int32x2_t, c: i32",int64x2_t,Vector widening saturating doubling multiply accumulate with scalar
+TRUE,vqdmlal_s16,"a: int32x4_t, b: int16x4_t, c: int16x4_t",int32x4_t,Signed saturating doubling multiply-add long
+TRUE,vqdmlal_s32,"a: int64x2_t, b: int32x2_t, c: int32x2_t",int64x2_t,Signed saturating doubling multiply-add long
+FALSE,vqdmlalh_lane_s16,"a: i32, b: i16, v: int16x4_t, lane: const int",i32,Signed saturating doubling multiply-add long
+FALSE,vqdmlalh_laneq_s16,"a: i32, b: i16, v: int16x8_t, lane: const int",i32,Signed saturating doubling multiply-add long
+FALSE,vqdmlalh_s16,"a: i32, b: i16, c: i16",i32,Signed saturating doubling multiply-add long
+FALSE,vqdmlals_lane_s32,"a: i64, b: i32, v: int32x2_t, lane: const int",i64,Signed saturating doubling multiply-add long
+FALSE,vqdmlals_laneq_s32,"a: i64, b: i32, v: int32x4_t, lane: const int",i64,Signed saturating doubling multiply-add long
+FALSE,vqdmlals_s32,"a: i64, b: i32, c: i32",i64,Signed saturating doubling multiply-add long
+TRUE,vqdmlsl_high_lane_s16,"a: int32x4_t, b: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_lane_s32,"a: int64x2_t, b: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_laneq_s16,"a: int32x4_t, b: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_laneq_s32,"a: int64x2_t, b: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_n_s16,"a: int32x4_t, b: int16x8_t, c: i16",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_n_s32,"a: int64x2_t, b: int32x4_t, c: i32",int64x2_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_s16,"a: int32x4_t, b: int16x8_t, c: int16x8_t",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_high_s32,"a: int64x2_t, b: int32x4_t, c: int32x4_t",int64x2_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_lane_s16,"a: int32x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector widening saturating doubling multiply subtract with scalar
+TRUE,vqdmlsl_lane_s32,"a: int64x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector widening saturating doubling multiply subtract with scalar
+TRUE,vqdmlsl_laneq_s16,"a: int32x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_laneq_s32,"a: int64x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_n_s16,"a: int32x4_t, b: int16x4_t, c: i16",int32x4_t,Vector widening saturating doubling multiply subtract with scalar
+TRUE,vqdmlsl_n_s32,"a: int64x2_t, b: int32x2_t, c: i32",int64x2_t,Vector widening saturating doubling multiply subtract with scalar
+TRUE,vqdmlsl_s16,"a: int32x4_t, b: int16x4_t, c: int16x4_t",int32x4_t,Signed saturating doubling multiply-subtract long
+TRUE,vqdmlsl_s32,"a: int64x2_t, b: int32x2_t, c: int32x2_t",int64x2_t,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlslh_lane_s16,"a: i32, b: i16, v: int16x4_t, lane: const int",i32,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlslh_laneq_s16,"a: i32, b: i16, v: int16x8_t, lane: const int",i32,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlslh_s16,"a: i32, b: i16, c: i16",i32,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlsls_lane_s32,"a: i64, b: i32, v: int32x2_t, lane: const int",i64,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlsls_laneq_s32,"a: i64, b: i32, v: int32x4_t, lane: const int",i64,Signed saturating doubling multiply-subtract long
+FALSE,vqdmlsls_s32,"a: i64, b: i32, c: i32",i64,Signed saturating doubling multiply-subtract long
+FALSE,vqdmulh_lane_s16,"a: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Vector saturating doubling multiply high by scalar
+FALSE,vqdmulh_lane_s32,"a: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Vector saturating doubling multiply high by scalar
+FALSE,vqdmulh_laneq_s16,"a: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Signed saturating doubling multiply returning high half
+FALSE,vqdmulh_laneq_s32,"a: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Signed saturating doubling multiply returning high half
+TRUE,vqdmulh_n_s16,"a: int16x4_t, b: i16",int16x4_t,Vector saturating doubling multiply high with scalar
+TRUE,vqdmulh_n_s32,"a: int32x2_t, b: i32",int32x2_t,Vector saturating doubling multiply high with scalar
+TRUE,vqdmulh_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating doubling multiply returning high half
+TRUE,vqdmulh_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhh_lane_s16,"a: i16, v: int16x4_t, lane: const int",i16,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhh_laneq_s16,"a: i16, v: int16x8_t, lane: const int",i16,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhh_s16,"a: i16, b: i16",i16,Signed saturating doubling multiply returning high half
+FALSE,vqdmulhq_lane_s16,"a: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Vector saturating doubling multiply high by scalar
+FALSE,vqdmulhq_lane_s32,"a: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Vector saturating doubling multiply high by scalar
+FALSE,vqdmulhq_laneq_s16,"a: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Signed saturating doubling multiply returning high half
+FALSE,vqdmulhq_laneq_s32,"a: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Signed saturating doubling multiply returning high half
+FALSE,vqdmulhq_n_s16,"a: int16x8_t, b: i16",int16x8_t,Vector saturating doubling multiply high with scalar
+FALSE,vqdmulhq_n_s32,"a: int32x4_t, b: i32",int32x4_t,Vector saturating doubling multiply high with scalar
+FALSE,vqdmulhq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhs_lane_s32,"a: i32, v: int32x2_t, lane: const int",i32,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhs_laneq_s32,"a: i32, v: int32x4_t, lane: const int",i32,Signed saturating doubling multiply returning high half
+TRUE,vqdmulhs_s32,"a: i32, b: i32",i32,Signed saturating doubling multiply returning high half
+TRUE,vqdmull_high_lane_s16,"a: int16x8_t, v: int16x4_t, lane: const int",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_lane_s32,"a: int32x4_t, v: int32x2_t, lane: const int",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_laneq_s16,"a: int16x8_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_laneq_s32,"a: int32x4_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_n_s16,"a: int16x8_t, b: i16",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_n_s32,"a: int32x4_t, b: i32",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_s16,"a: int16x8_t, b: int16x8_t",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_high_s32,"a: int32x4_t, b: int32x4_t",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmull_lane_s16,"a: int16x4_t, v: int16x4_t, lane: const int",int32x4_t,Vector saturating doubling long multiply by scalar
+TRUE,vqdmull_lane_s32,"a: int32x2_t, v: int32x2_t, lane: const int",int64x2_t,Vector saturating doubling long multiply by scalar
+TRUE,vqdmull_laneq_s16,"a: int16x4_t, v: int16x8_t, lane: const int",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_laneq_s32,"a: int32x2_t, v: int32x4_t, lane: const int",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmull_n_s16,"a: int16x4_t, b: i16",int32x4_t,Vector saturating doubling long multiply with scalar
+TRUE,vqdmull_n_s32,"a: int32x2_t, b: i32",int64x2_t,Vector saturating doubling long multiply with scalar
+TRUE,vqdmull_s16,"a: int16x4_t, b: int16x4_t",int32x4_t,Signed saturating doubling multiply long
+TRUE,vqdmull_s32,"a: int32x2_t, b: int32x2_t",int64x2_t,Signed saturating doubling multiply long
+TRUE,vqdmullh_lane_s16,"a: i16, v: int16x4_t, lane: const int",i32,Signed saturating doubling multiply long
+TRUE,vqdmullh_laneq_s16,"a: i16, v: int16x8_t, lane: const int",i32,Signed saturating doubling multiply long
+TRUE,vqdmullh_s16,"a: i16, b: i16",i32,Signed saturating doubling multiply long
+TRUE,vqdmulls_lane_s32,"a: i32, v: int32x2_t, lane: const int",i64,Signed saturating doubling multiply long
+TRUE,vqdmulls_laneq_s32,"a: i32, v: int32x4_t, lane: const int",i64,Signed saturating doubling multiply long
+TRUE,vqdmulls_s32,"a: i32, b: i32",i64,Signed saturating doubling multiply long
+FALSE,vqmovn_high_s16,"r: int8x8_t, a: int16x8_t",int8x16_t,Signed saturating extract narrow
+FALSE,vqmovn_high_s32,"r: int16x4_t, a: int32x4_t",int16x8_t,Signed saturating extract narrow
+FALSE,vqmovn_high_s64,"r: int32x2_t, a: int64x2_t",int32x4_t,Signed saturating extract narrow
+FALSE,vqmovn_high_u16,"r: uint8x8_t, a: uint16x8_t",uint8x16_t,Unsigned saturating extract narrow
+FALSE,vqmovn_high_u32,"r: uint16x4_t, a: uint32x4_t",uint16x8_t,Unsigned saturating extract narrow
+FALSE,vqmovn_high_u64,"r: uint32x2_t, a: uint64x2_t",uint32x4_t,Unsigned saturating extract narrow
+FALSE,vqmovn_s16,a: int16x8_t,int8x8_t,Signed saturating extract narrow
+FALSE,vqmovn_s32,a: int32x4_t,int16x4_t,Signed saturating extract narrow
+FALSE,vqmovn_s64,a: int64x2_t,int32x2_t,Signed saturating extract narrow
+FALSE,vqmovn_u16,a: uint16x8_t,uint8x8_t,Unsigned saturating extract narrow
+FALSE,vqmovn_u32,a: uint32x4_t,uint16x4_t,Unsigned saturating extract narrow
+TRUE,vqmovn_u64,a: uint64x2_t,uint32x2_t,Unsigned saturating extract narrow
+FALSE,vqmovnd_s64,a: i64,i32,Signed saturating extract narrow
+FALSE,vqmovnd_u64,a: u64,u32,Unsigned saturating extract narrow
+FALSE,vqmovnh_s16,a: i16,i8,Signed saturating extract narrow
+FALSE,vqmovnh_u16,a: u16,u8,Unsigned saturating extract narrow
+FALSE,vqmovns_s32,a: i32,i16,Signed saturating extract narrow
+FALSE,vqmovns_u32,a: u32,u16,Unsigned saturating extract narrow
+FALSE,vqmovun_high_s16,"r: uint8x8_t, a: int16x8_t",uint8x16_t,Signed saturating extract unsigned narrow
+FALSE,vqmovun_high_s32,"r: uint16x4_t, a: int32x4_t",uint16x8_t,Signed saturating extract unsigned narrow
+FALSE,vqmovun_high_s64,"r: uint32x2_t, a: int64x2_t",uint32x4_t,Signed saturating extract unsigned narrow
+FALSE,vqmovun_s16,a: int16x8_t,uint8x8_t,Signed saturating extract unsigned narrow
+FALSE,vqmovun_s32,a: int32x4_t,uint16x4_t,Signed saturating extract unsigned narrow
+FALSE,vqmovun_s64,a: int64x2_t,uint32x2_t,Signed saturating extract unsigned narrow
+FALSE,vqmovund_s64,a: i64,u32,Signed saturating extract unsigned narrow
+FALSE,vqmovunh_s16,a: i16,u8,Signed saturating extract unsigned narrow
+FALSE,vqmovuns_s32,a: i32,u16,Signed saturating extract unsigned narrow
+TRUE,vqneg_s16,a: int16x4_t,int16x4_t,Signed saturating negate
+TRUE,vqneg_s32,a: int32x2_t,int32x2_t,Signed saturating negate
+TRUE,vqneg_s64,a: int64x1_t,int64x1_t,Signed saturating negate
+TRUE,vqneg_s8,a: int8x8_t,int8x8_t,Signed saturating negate
+FALSE,vqnegb_s8,a: i8,i8,Signed saturating negate
+FALSE,vqnegd_s64,a: i64,i64,Signed saturating negate
+FALSE,vqnegh_s16,a: i16,i16,Signed saturating negate
+TRUE,vqnegq_s16,a: int16x8_t,int16x8_t,Signed saturating negate
+TRUE,vqnegq_s32,a: int32x4_t,int32x4_t,Signed saturating negate
+TRUE,vqnegq_s64,a: int64x2_t,int64x2_t,Signed saturating negate
+TRUE,vqnegq_s8,a: int8x16_t,int8x16_t,Signed saturating negate
+FALSE,vqnegs_s32,a: i32,i32,Signed saturating negate
+FALSE,vqrdmlah_lane_s16,"a: int16x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlah_lane_s32,"a: int32x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlah_laneq_s16,"a: int16x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlah_laneq_s32,"a: int32x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlah_s16,"a: int16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlah_s32,"a: int32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahh_lane_s16,"a: i16, b: i16, v: int16x4_t, lane: const int",i16,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahh_laneq_s16,"a: i16, b: i16, v: int16x8_t, lane: const int",i16,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahh_s16,"a: i16, b: i16, c: i16",i16,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlahq_lane_s16,"a: int16x8_t, b: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahq_lane_s32,"a: int32x4_t, b: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahq_laneq_s16,"a: int16x8_t, b: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahq_laneq_s32,"a: int32x4_t, b: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahs_lane_s32,"a: i32, b: i32, v: int32x4_t, lane: const int",i32,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahs_laneq_s32,"a: i32, b: i32, v: int32x8_t, lane: const int",i32,Signed saturating rounding doubling multiply accumulate returning high half
+FALSE,vqrdmlahs_s32,"a: i32, b: i32, c: i32",i32,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_lane_s16,"a: int16x4_t, b: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_lane_s32,"a: int32x2_t, b: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_laneq_s16,"a: int16x4_t, b: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_laneq_s32,"a: int32x2_t, b: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_s16,"a: int16x4_t, b: int16x4_t, c: int16x4_t",int16x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlsh_s32,"a: int32x2_t, b: int32x2_t, c: int32x2_t",int32x2_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshh_lane_s16,"a: i16, b: i16, v: int16x4_t, lane: const int",i16,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshh_laneq_s16,"a: i16, b: i16, v: int16x8_t, lane: const int",i16,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshh_s16,"a: i16, b: i16, c: i16",i16,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_lane_s16,"a: int16x8_t, b: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_lane_s32,"a: int32x4_t, b: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_laneq_s16,"a: int16x8_t, b: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_laneq_s32,"a: int32x4_t, b: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_s16,"a: int16x8_t, b: int16x8_t, c: int16x8_t",int16x8_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshq_s32,"a: int32x4_t, b: int32x4_t, c: int32x4_t",int32x4_t,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshs_lane_s32,"a: i32, b: i32, v: int32x4_t, lane: const int",i32,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshs_laneq_s32,"a: i32, b: i32, v: int32x8_t, lane: const int",i32,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmlshs_s32,"a: i32, b: i32, c: i32",i32,Signed saturating rounding doubling multiply subtract returning high half
+FALSE,vqrdmulh_lane_s16,"a: int16x4_t, v: int16x4_t, lane: const int",int16x4_t,Vector rounding saturating doubling multiply high by scalar
+FALSE,vqrdmulh_lane_s32,"a: int32x2_t, v: int32x2_t, lane: const int",int32x2_t,Vector rounding saturating doubling multiply high by scalar
+FALSE,vqrdmulh_laneq_s16,"a: int16x4_t, v: int16x8_t, lane: const int",int16x4_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulh_laneq_s32,"a: int32x2_t, v: int32x4_t, lane: const int",int32x2_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulh_n_s16,"a: int16x4_t, b: i16",int16x4_t,Vector saturating rounding doubling multiply high with scalar
+FALSE,vqrdmulh_n_s32,"a: int32x2_t, b: i32",int32x2_t,Vector saturating rounding doubling multiply high with scalar
+FALSE,vqrdmulh_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulh_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhh_lane_s16,"a: i16, v: int16x4_t, lane: const int",i16,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhh_laneq_s16,"a: i16, v: int16x8_t, lane: const int",i16,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhh_s16,"a: i16, b: i16",i16,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhq_lane_s16,"a: int16x8_t, v: int16x4_t, lane: const int",int16x8_t,Vector rounding saturating doubling multiply high by scalar
+FALSE,vqrdmulhq_lane_s32,"a: int32x4_t, v: int32x2_t, lane: const int",int32x4_t,Vector rounding saturating doubling multiply high by scalar
+FALSE,vqrdmulhq_laneq_s16,"a: int16x8_t, v: int16x8_t, lane: const int",int16x8_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhq_laneq_s32,"a: int32x4_t, v: int32x4_t, lane: const int",int32x4_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhq_n_s16,"a: int16x8_t, b: i16",int16x8_t,Vector saturating rounding doubling multiply high with scalar
+FALSE,vqrdmulhq_n_s32,"a: int32x4_t, b: i32",int32x4_t,Vector saturating rounding doubling multiply high with scalar
+FALSE,vqrdmulhq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhs_lane_s32,"a: i32, v: int32x2_t, lane: const int",i32,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhs_laneq_s32,"a: i32, v: int32x4_t, lane: const int",i32,Signed saturating rounding doubling multiply returning high half
+FALSE,vqrdmulhs_s32,"a: i32, b: i32",i32,Signed saturating rounding doubling multiply returning high half
+TRUE,vqrshl_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating rounding shift left
+TRUE,vqrshl_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating rounding shift left
+TRUE,vqrshl_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed saturating rounding shift left
+TRUE,vqrshl_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed saturating rounding shift left
+TRUE,vqrshl_u16,"a: uint16x4_t, b: int16x4_t",uint16x4_t,Unsigned saturating rounding shift left
+TRUE,vqrshl_u32,"a: uint32x2_t, b: int32x2_t",uint32x2_t,Unsigned saturating rounding shift left
+TRUE,vqrshl_u64,"a: uint64x1_t, b: int64x1_t",uint64x1_t,Unsigned saturating rounding shift left
+TRUE,vqrshl_u8,"a: uint8x8_t, b: int8x8_t",uint8x8_t,Unsigned saturating rounding shift left
+TRUE,vqrshlb_s8,"a: i8, b: i8",i8,Signed saturating rounding shift left
+TRUE,vqrshlb_u8,"a: u8, b: i8",u8,Unsigned saturating rounding shift left
+TRUE,vqrshld_s64,"a: i64, b: i64",i64,Signed saturating rounding shift left
+TRUE,vqrshld_u64,"a: u64, b: i64",u64,Unsigned saturating rounding shift left
+TRUE,vqrshlh_s16,"a: i16, b: i16",i16,Signed saturating rounding shift left
+TRUE,vqrshlh_u16,"a: u16, b: i16",u16,Unsigned saturating rounding shift left
+TRUE,vqrshlq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating rounding shift left
+TRUE,vqrshlq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating rounding shift left
+TRUE,vqrshlq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed saturating rounding shift left
+TRUE,vqrshlq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed saturating rounding shift left
+TRUE,vqrshlq_u16,"a: uint16x8_t, b: int16x8_t",uint16x8_t,Unsigned saturating rounding shift left
+TRUE,vqrshlq_u32,"a: uint32x4_t, b: int32x4_t",uint32x4_t,Unsigned saturating rounding shift left
+TRUE,vqrshlq_u64,"a: uint64x2_t, b: int64x2_t",uint64x2_t,Unsigned saturating rounding shift left
+TRUE,vqrshlq_u8,"a: uint8x16_t, b: int8x16_t",uint8x16_t,Unsigned saturating rounding shift left
+TRUE,vqrshls_s32,"a: i32, b: i32",i32,Signed saturating rounding shift left
+TRUE,vqrshls_u32,"a: u32, b: i32",u32,Unsigned saturating rounding shift left
+TRUE,vqrshrn_high_n_s16,"r: int8x8_t, a: int16x8_t, n: const int",int8x16_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_high_n_s32,"r: int16x4_t, a: int32x4_t, n: const int",int16x8_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_high_n_s64,"r: int32x2_t, a: int64x2_t, n: const int",int32x4_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_high_n_u16,"r: uint8x8_t, a: uint16x8_t, n: const int",uint8x16_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrn_high_n_u32,"r: uint16x4_t, a: uint32x4_t, n: const int",uint16x8_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrn_high_n_u64,"r: uint32x2_t, a: uint64x2_t, n: const int",uint32x4_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrn_n_s16,"a: int16x8_t, n: const int",int8x8_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_n_s32,"a: int32x4_t, n: const int",int16x4_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_n_s64,"a: int64x2_t, n: const int",int32x2_t,Signed saturating rounded shift right narrow
+TRUE,vqrshrn_n_u16,"a: uint16x8_t, n: const int",uint8x8_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrn_n_u32,"a: uint32x4_t, n: const int",uint16x4_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrn_n_u64,"a: uint64x2_t, n: const int",uint32x2_t,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrnd_n_s64,"a: i64, n: const int",i32,Signed saturating rounded shift right narrow
+TRUE,vqrshrnd_n_u64,"a: u64, n: const int",u32,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrnh_n_s16,"a: i16, n: const int",i8,Signed saturating rounded shift right narrow
+TRUE,vqrshrnh_n_u16,"a: u16, n: const int",u8,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrns_n_s32,"a: i32, n: const int",i16,Signed saturating rounded shift right narrow
+TRUE,vqrshrns_n_u32,"a: u32, n: const int",u16,Unsigned saturating rounded shift right narrow
+TRUE,vqrshrun_high_n_s16,"r: uint8x8_t, a: int16x8_t, n: const int",uint8x16_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrun_high_n_s32,"r: uint16x4_t, a: int32x4_t, n: const int",uint16x8_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrun_high_n_s64,"r: uint32x2_t, a: int64x2_t, n: const int",uint32x4_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrun_n_s16,"a: int16x8_t, n: const int",uint8x8_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrun_n_s32,"a: int32x4_t, n: const int",uint16x4_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrun_n_s64,"a: int64x2_t, n: const int",uint32x2_t,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrund_n_s64,"a: i64, n: const int",u32,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshrunh_n_s16,"a: i16, n: const int",u8,Signed saturating rounded shift right unsigned narrow
+TRUE,vqrshruns_n_s32,"a: i32, n: const int",u16,Signed saturating rounded shift right unsigned narrow
+TRUE,vqshl_n_s16,"a: int16x4_t, n: const int",int16x4_t,Signed saturating shift left
+TRUE,vqshl_n_s32,"a: int32x2_t, n: const int",int32x2_t,Signed saturating shift left
+TRUE,vqshl_n_s64,"a: int64x1_t, n: const int",int64x1_t,Signed saturating shift left
+TRUE,vqshl_n_s8,"a: int8x8_t, n: const int",int8x8_t,Signed saturating shift left
+TRUE,vqshl_n_u16,"a: uint16x4_t, n: const int",uint16x4_t,Unsigned saturating shift left
+TRUE,vqshl_n_u32,"a: uint32x2_t, n: const int",uint32x2_t,Unsigned saturating shift left
+TRUE,vqshl_n_u64,"a: uint64x1_t, n: const int",uint64x1_t,Unsigned saturating shift left
+TRUE,vqshl_n_u8,"a: uint8x8_t, n: const int",uint8x8_t,Unsigned saturating shift left
+TRUE,vqshl_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating shift left
+TRUE,vqshl_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating shift left
+TRUE,vqshl_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed saturating shift left
+TRUE,vqshl_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed saturating shift left
+TRUE,vqshl_u16,"a: uint16x4_t, b: int16x4_t",uint16x4_t,Unsigned saturating shift left
+TRUE,vqshl_u32,"a: uint32x2_t, b: int32x2_t",uint32x2_t,Unsigned saturating shift left
+TRUE,vqshl_u64,"a: uint64x1_t, b: int64x1_t",uint64x1_t,Unsigned saturating shift left
+TRUE,vqshl_u8,"a: uint8x8_t, b: int8x8_t",uint8x8_t,Unsigned saturating shift left
+TRUE,vqshlb_n_s8,"a: i8, n: const int",i8,Signed saturating shift left
+TRUE,vqshlb_n_u8,"a: u8, n: const int",u8,Unsigned saturating shift left
+TRUE,vqshlb_s8,"a: i8, b: i8",i8,Signed saturating shift left
+TRUE,vqshlb_u8,"a: u8, b: i8",u8,Unsigned saturating shift left
+TRUE,vqshld_n_s64,"a: i64, n: const int",i64,Signed saturating shift left
+TRUE,vqshld_n_u64,"a: u64, n: const int",u64,Unsigned saturating shift left
+TRUE,vqshld_s64,"a: i64, b: i64",i64,Signed saturating shift left
+TRUE,vqshld_u64,"a: u64, b: i64",u64,Unsigned saturating shift left
+TRUE,vqshlh_n_s16,"a: i16, n: const int",i16,Signed saturating shift left
+TRUE,vqshlh_n_u16,"a: u16, n: const int",u16,Unsigned saturating shift left
+TRUE,vqshlh_s16,"a: i16, b: i16",i16,Signed saturating shift left
+TRUE,vqshlh_u16,"a: u16, b: i16",u16,Unsigned saturating shift left
+TRUE,vqshlq_n_s16,"a: int16x8_t, n: const int",int16x8_t,Signed saturating shift left
+TRUE,vqshlq_n_s32,"a: int32x4_t, n: const int",int32x4_t,Signed saturating shift left
+TRUE,vqshlq_n_s64,"a: int64x2_t, n: const int",int64x2_t,Signed saturating shift left
+TRUE,vqshlq_n_s8,"a: int8x16_t, n: const int",int8x16_t,Signed saturating shift left
+TRUE,vqshlq_n_u16,"a: uint16x8_t, n: const int",uint16x8_t,Unsigned saturating shift left
+TRUE,vqshlq_n_u32,"a: uint32x4_t, n: const int",uint32x4_t,Unsigned saturating shift left
+TRUE,vqshlq_n_u64,"a: uint64x2_t, n: const int",uint64x2_t,Unsigned saturating shift left
+TRUE,vqshlq_n_u8,"a: uint8x16_t, n: const int",uint8x16_t,Unsigned saturating shift left
+TRUE,vqshlq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating shift left
+TRUE,vqshlq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating shift left
+TRUE,vqshlq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed saturating shift left
+TRUE,vqshlq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed saturating shift left
+TRUE,vqshlq_u16,"a: uint16x8_t, b: int16x8_t",uint16x8_t,Unsigned saturating shift left
+TRUE,vqshlq_u32,"a: uint32x4_t, b: int32x4_t",uint32x4_t,Unsigned saturating shift left
+TRUE,vqshlq_u64,"a: uint64x2_t, b: int64x2_t",uint64x2_t,Unsigned saturating shift left
+TRUE,vqshlq_u8,"a: uint8x16_t, b: int8x16_t",uint8x16_t,Unsigned saturating shift left
+TRUE,vqshls_n_s32,"a: i32, n: const int",i32,Signed saturating shift left
+TRUE,vqshls_n_u32,"a: u32, n: const int",u32,Unsigned saturating shift left
+TRUE,vqshls_s32,"a: i32, b: i32",i32,Signed saturating shift left
+TRUE,vqshls_u32,"a: u32, b: i32",u32,Unsigned saturating shift left
+FALSE,vqshlu_n_s16,"a: int16x4_t, n: const int",uint16x4_t,Signed saturating shift left unsigned
+FALSE,vqshlu_n_s32,"a: int32x2_t, n: const int",uint32x2_t,Signed saturating shift left unsigned
+FALSE,vqshlu_n_s64,"a: int64x1_t, n: const int",uint64x1_t,Signed saturating shift left unsigned
+FALSE,vqshlu_n_s8,"a: int8x8_t, n: const int",uint8x8_t,Signed saturating shift left unsigned
+FALSE,vqshlub_n_s8,"a: i8, n: const int",u8,Signed saturating shift left unsigned
+FALSE,vqshlud_n_s64,"a: i64, n: const int",u64,Signed saturating shift left unsigned
+FALSE,vqshluh_n_s16,"a: i16, n: const int",u16,Signed saturating shift left unsigned
+FALSE,vqshluq_n_s16,"a: int16x8_t, n: const int",uint16x8_t,Signed saturating shift left unsigned
+FALSE,vqshluq_n_s32,"a: int32x4_t, n: const int",uint32x4_t,Signed saturating shift left unsigned
+FALSE,vqshluq_n_s64,"a: int64x2_t, n: const int",uint64x2_t,Signed saturating shift left unsigned
+FALSE,vqshluq_n_s8,"a: int8x16_t, n: const int",uint8x16_t,Signed saturating shift left unsigned
+FALSE,vqshlus_n_s32,"a: i32, n: const int",u32,Signed saturating shift left unsigned
+TRUE,vqshrn_high_n_s16,"r: int8x8_t, a: int16x8_t, n: const int",int8x16_t,Signed saturating shift right narrow
+TRUE,vqshrn_high_n_s32,"r: int16x4_t, a: int32x4_t, n: const int",int16x8_t,Signed saturating shift right narrow
+TRUE,vqshrn_high_n_s64,"r: int32x2_t, a: int64x2_t, n: const int",int32x4_t,Signed saturating shift right narrow
+TRUE,vqshrn_high_n_u16,"r: uint8x8_t, a: uint16x8_t, n: const int",uint8x16_t,Unsigned saturating shift right narrow
+TRUE,vqshrn_high_n_u32,"r: uint16x4_t, a: uint32x4_t, n: const int",uint16x8_t,Unsigned saturating shift right narrow
+TRUE,vqshrn_high_n_u64,"r: uint32x2_t, a: uint64x2_t, n: const int",uint32x4_t,Unsigned saturating shift right narrow
+TRUE,vqshrn_n_s16,"a: int16x8_t, n: const int",int8x8_t,Signed saturating shift right narrow
+TRUE,vqshrn_n_s32,"a: int32x4_t, n: const int",int16x4_t,Signed saturating shift right narrow
+TRUE,vqshrn_n_s64,"a: int64x2_t, n: const int",int32x2_t,Signed saturating shift right narrow
+TRUE,vqshrn_n_u16,"a: uint16x8_t, n: const int",uint8x8_t,Unsigned saturating shift right narrow
+TRUE,vqshrn_n_u32,"a: uint32x4_t, n: const int",uint16x4_t,Unsigned saturating shift right narrow
+TRUE,vqshrn_n_u64,"a: uint64x2_t, n: const int",uint32x2_t,Unsigned saturating shift right narrow
+TRUE,vqshrnd_n_s64,"a: i64, n: const int",i32,Signed saturating shift right narrow
+TRUE,vqshrnd_n_u64,"a: u64, n: const int",u32,Unsigned saturating shift right narrow
+TRUE,vqshrnh_n_s16,"a: i16, n: const int",i8,Signed saturating shift right narrow
+TRUE,vqshrnh_n_u16,"a: u16, n: const int",u8,Unsigned saturating shift right narrow
+TRUE,vqshrns_n_s32,"a: i32, n: const int",i16,Signed saturating shift right narrow
+TRUE,vqshrns_n_u32,"a: u32, n: const int",u16,Unsigned saturating shift right narrow
+TRUE,vqshrun_high_n_s16,"r: uint8x8_t, a: int16x8_t, n: const int",uint8x16_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrun_high_n_s32,"r: uint16x4_t, a: int32x4_t, n: const int",uint16x8_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrun_high_n_s64,"r: uint32x2_t, a: int64x2_t, n: const int",uint32x4_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrun_n_s16,"a: int16x8_t, n: const int",uint8x8_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrun_n_s32,"a: int32x4_t, n: const int",uint16x4_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrun_n_s64,"a: int64x2_t, n: const int",uint32x2_t,Signed saturating shift right unsigned narrow
+TRUE,vqshrund_n_s64,"a: i64, n: const int",u32,Signed saturating shift right unsigned narrow
+TRUE,vqshrunh_n_s16,"a: i16, n: const int",u8,Signed saturating shift right unsigned narrow
+TRUE,vqshruns_n_s32,"a: i32, n: const int",u16,Signed saturating shift right unsigned narrow
+TRUE,vqsub_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed saturating subtract
+TRUE,vqsub_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed saturating subtract
+TRUE,vqsub_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed saturating subtract
+TRUE,vqsub_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed saturating subtract
+TRUE,vqsub_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned saturating subtract
+TRUE,vqsub_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned saturating subtract
+TRUE,vqsub_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Unsigned saturating subtract
+TRUE,vqsub_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned saturating subtract
+TRUE,vqsubb_s8,"a: i8, b: i8",i8,Signed saturating subtract
+TRUE,vqsubb_u8,"a: u8, b: u8",u8,Unsigned saturating subtract
+TRUE,vqsubd_s64,"a: i64, b: i64",i64,Signed saturating subtract
+TRUE,vqsubd_u64,"a: u64, b: u64",u64,Unsigned saturating subtract
+TRUE,vqsubh_s16,"a: i16, b: i16",i16,Signed saturating subtract
+TRUE,vqsubh_u16,"a: u16, b: u16",u16,Unsigned saturating subtract
+TRUE,vqsubq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed saturating subtract
+TRUE,vqsubq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed saturating subtract
+TRUE,vqsubq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed saturating subtract
+TRUE,vqsubq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed saturating subtract
+TRUE,vqsubq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned saturating subtract
+TRUE,vqsubq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned saturating subtract
+TRUE,vqsubq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Unsigned saturating subtract
+TRUE,vqsubq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned saturating subtract
+TRUE,vqsubs_s32,"a: i32, b: i32",i32,Signed saturating subtract
+TRUE,vqsubs_u32,"a: u32, b: u32",u32,Unsigned saturating subtract
+TRUE,vqtbl1_p8,"t: poly8x16_t, idx: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vqtbl1_s8,"t: int8x16_t, idx: uint8x8_t",int8x8_t,Table vector lookup
+TRUE,vqtbl1_u8,"t: uint8x16_t, idx: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vqtbl1q_p8,"t: poly8x16_t, idx: uint8x16_t",poly8x16_t,Table vector lookup
+TRUE,vqtbl1q_s8,"t: int8x16_t, idx: uint8x16_t",int8x16_t,Table vector lookup
+TRUE,vqtbl1q_u8,"t: uint8x16_t, idx: uint8x16_t",uint8x16_t,Table vector lookup
+TRUE,vqtbl2_p8,"t: poly8x16x2_t, idx: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vqtbl2_s8,"t: int8x16x2_t, idx: uint8x8_t",int8x8_t,Table vector lookup
+TRUE,vqtbl2_u8,"t: uint8x16x2_t, idx: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vqtbl2q_p8,"t: poly8x16x2_t, idx: uint8x16_t",poly8x16_t,Table vector lookup
+TRUE,vqtbl2q_s8,"t: int8x16x2_t, idx: uint8x16_t",int8x16_t,Table vector lookup
+TRUE,vqtbl2q_u8,"t: uint8x16x2_t, idx: uint8x16_t",uint8x16_t,Table vector lookup
+TRUE,vqtbl3_p8,"t: poly8x16x3_t, idx: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vqtbl3_s8,"t: int8x16x3_t, idx: uint8x8_t",int8x8_t,Table vector lookup
+TRUE,vqtbl3_u8,"t: uint8x16x3_t, idx: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vqtbl3q_p8,"t: poly8x16x3_t, idx: uint8x16_t",poly8x16_t,Table vector lookup
+TRUE,vqtbl3q_s8,"t: int8x16x3_t, idx: uint8x16_t",int8x16_t,Table vector lookup
+TRUE,vqtbl3q_u8,"t: uint8x16x3_t, idx: uint8x16_t",uint8x16_t,Table vector lookup
+TRUE,vqtbl4_p8,"t: poly8x16x4_t, idx: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vqtbl4_s8,"t: int8x16x4_t, idx: uint8x8_t",int8x8_t,Table vector lookup
+TRUE,vqtbl4_u8,"t: uint8x16x4_t, idx: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vqtbl4q_p8,"t: poly8x16x4_t, idx: uint8x16_t",poly8x16_t,Table vector lookup
+TRUE,vqtbl4q_s8,"t: int8x16x4_t, idx: uint8x16_t",int8x16_t,Table vector lookup
+TRUE,vqtbl4q_u8,"t: uint8x16x4_t, idx: uint8x16_t",uint8x16_t,Table vector lookup
+TRUE,vqtbx1_p8,"a: poly8x8_t, t: poly8x16_t, idx: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vqtbx1_s8,"a: int8x8_t, t: int8x16_t, idx: uint8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vqtbx1_u8,"a: uint8x8_t, t: uint8x16_t, idx: uint8x8_t",uint8x8_t,Table vector lookup extension
+TRUE,vqtbx1q_p8,"a: poly8x16_t, t: poly8x16_t, idx: uint8x16_t",poly8x16_t,Table vector lookup extension
+TRUE,vqtbx1q_s8,"a: int8x16_t, t: int8x16_t, idx: uint8x16_t",int8x16_t,Table vector lookup extension
+TRUE,vqtbx1q_u8,"a: uint8x16_t, t: uint8x16_t, idx: uint8x16_t",uint8x16_t,Table vector lookup extension
+TRUE,vqtbx2_p8,"a: poly8x8_t, t: poly8x16x2_t, idx: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vqtbx2_s8,"a: int8x8_t, t: int8x16x2_t, idx: uint8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vqtbx2_u8,"a: uint8x8_t, t: uint8x16x2_t, idx: uint8x8_t",uint8x8_t,Table vector lookup extension
+TRUE,vqtbx2q_p8,"a: poly8x16_t, t: poly8x16x2_t, idx: uint8x16_t",poly8x16_t,Table vector lookup extension
+TRUE,vqtbx2q_s8,"a: int8x16_t, t: int8x16x2_t, idx: uint8x16_t",int8x16_t,Table vector lookup extension
+TRUE,vqtbx2q_u8,"a: uint8x16_t, t: uint8x16x2_t, idx: uint8x16_t",uint8x16_t,Table vector lookup extension
+TRUE,vqtbx3_p8,"a: poly8x8_t, t: poly8x16x3_t, idx: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vqtbx3_s8,"a: int8x8_t, t: int8x16x3_t, idx: uint8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vqtbx3_u8,"a: uint8x8_t, t: uint8x16x3_t, idx: uint8x8_t",uint8x8_t,Table vector lookup extension
+TRUE,vqtbx3q_p8,"a: poly8x16_t, t: poly8x16x3_t, idx: uint8x16_t",poly8x16_t,Table vector lookup extension
+TRUE,vqtbx3q_s8,"a: int8x16_t, t: int8x16x3_t, idx: uint8x16_t",int8x16_t,Table vector lookup extension
+TRUE,vqtbx3q_u8,"a: uint8x16_t, t: uint8x16x3_t, idx: uint8x16_t",uint8x16_t,Table vector lookup extension
+TRUE,vqtbx4_p8,"a: poly8x8_t, t: poly8x16x4_t, idx: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vqtbx4_s8,"a: int8x8_t, t: int8x16x4_t, idx: uint8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vqtbx4_u8,"a: uint8x8_t, t: uint8x16x4_t, idx: uint8x8_t",uint8x8_t,Table vector lookup extension
+TRUE,vqtbx4q_p8,"a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t",poly8x16_t,Table vector lookup extension
+TRUE,vqtbx4q_s8,"a: int8x16_t, t: int8x16x4_t, idx: uint8x16_t",int8x16_t,Table vector lookup extension
+TRUE,vqtbx4q_u8,"a: uint8x16_t, t: uint8x16x4_t, idx: uint8x16_t",uint8x16_t,Table vector lookup extension
+FALSE,vraddhn_high_s16,"r: int8x8_t, a: int16x8_t, b: int16x8_t",int8x16_t,Rounding add returning high narrow
+FALSE,vraddhn_high_s32,"r: int16x4_t, a: int32x4_t, b: int32x4_t",int16x8_t,Rounding add returning high narrow
+FALSE,vraddhn_high_s64,"r: int32x2_t, a: int64x2_t, b: int64x2_t",int32x4_t,Rounding add returning high narrow
+FALSE,vraddhn_high_u16,"r: uint8x8_t, a: uint16x8_t, b: uint16x8_t",uint8x16_t,Rounding add returning high narrow
+FALSE,vraddhn_high_u32,"r: uint16x4_t, a: uint32x4_t, b: uint32x4_t",uint16x8_t,Rounding add returning high narrow
+FALSE,vraddhn_high_u64,"r: uint32x2_t, a: uint64x2_t, b: uint64x2_t",uint32x4_t,Rounding add returning high narrow
+FALSE,vraddhn_s16,"a: int16x8_t, b: int16x8_t",int8x8_t,Rounding add returning high narrow
+FALSE,vraddhn_s32,"a: int32x4_t, b: int32x4_t",int16x4_t,Rounding add returning high narrow
+FALSE,vraddhn_s64,"a: int64x2_t, b: int64x2_t",int32x2_t,Rounding add returning high narrow
+FALSE,vraddhn_u16,"a: uint16x8_t, b: uint16x8_t",uint8x8_t,Rounding add returning high narrow
+FALSE,vraddhn_u32,"a: uint32x4_t, b: uint32x4_t",uint16x4_t,Rounding add returning high narrow
+FALSE,vraddhn_u64,"a: uint64x2_t, b: uint64x2_t",uint32x2_t,Rounding add returning high narrow
+FALSE,vrax1q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Rotate and exclusive OR
+TRUE,vrbit_p8,a: poly8x8_t,poly8x8_t,Reverse bit order
+TRUE,vrbit_s8,a: int8x8_t,int8x8_t,Reverse bit order
+TRUE,vrbit_u8,a: uint8x8_t,uint8x8_t,Reverse bit order
+TRUE,vrbitq_p8,a: poly8x16_t,poly8x16_t,Reverse bit order
+TRUE,vrbitq_s8,a: int8x16_t,int8x16_t,Reverse bit order
+TRUE,vrbitq_u8,a: uint8x16_t,uint8x16_t,Reverse bit order
+FALSE,vrecpe_f16,a: float16x4_t,float16x4_t,Floating-point reciprocal estimate
+TRUE,vrecpe_f32,a: float32x2_t,float32x2_t,Floating-point reciprocal estimate
+TRUE,vrecpe_f64,a: float64x1_t,float64x1_t,Floating-point reciprocal estimate
+FALSE,vrecpe_u32,a: uint32x2_t,uint32x2_t,Unsigned reciprocal estimate
+FALSE,vrecped_f64,a: float64_t,float64_t,Floating-point reciprocal estimate
+FALSE,vrecpeh_f16,a: float16_t,float16_t,Floating-point reciprocal estimate
+FALSE,vrecpeq_f16,a: float16x8_t,float16x8_t,Floating-point reciprocal estimate
+TRUE,vrecpeq_f32,a: float32x4_t,float32x4_t,Floating-point reciprocal estimate
+TRUE,vrecpeq_f64,a: float64x2_t,float64x2_t,Floating-point reciprocal estimate
+FALSE,vrecpeq_u32,a: uint32x4_t,uint32x4_t,Unsigned reciprocal estimate
+FALSE,vrecpes_f32,a: f32,f32,Floating-point reciprocal estimate
+FALSE,vrecps_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point reciprocal step
+FALSE,vrecps_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point reciprocal step
+FALSE,vrecps_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point reciprocal step
+FALSE,vrecpsd_f64,"a: float64_t, b: float64_t",float64_t,Floating-point reciprocal step
+FALSE,vrecpsh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point reciprocal step
+FALSE,vrecpsq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point reciprocal step
+FALSE,vrecpsq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point reciprocal step
+FALSE,vrecpsq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point reciprocal step
+FALSE,vrecpss_f32,"a: f32, b: f32",f32,Floating-point reciprocal step
+FALSE,vrecpxd_f64,a: float64_t,float64_t,Floating-point reciprocal exponent
+FALSE,vrecpxh_f16,a: float16_t,float16_t,Floating-point reciprocal exponent
+FALSE,vrecpxs_f32,a: f32,f32,Floating-point reciprocal exponent
+FALSE,vreinterpret_bf16_f32,a: float32x2_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_f64,a: float64x1_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_p16,a: poly16x4_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_p64,a: poly64x1_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_p8,a: poly8x8_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_s16,a: int16x4_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_s32,a: int32x2_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_s64,a: int64x1_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_s8,a: int8x8_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_u16,a: uint16x4_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_u32,a: uint32x2_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_u64,a: uint64x1_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_bf16_u8,a: uint8x8_t,bfloat16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_f32,a: float32x2_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_f64,a: float64x1_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_p16,a: poly16x4_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_p64,a: poly64x1_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_p8,a: poly8x8_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_s16,a: int16x4_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_s32,a: int32x2_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_s64,a: int64x1_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_s8,a: int8x8_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_u16,a: uint16x4_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_u32,a: uint32x2_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_u64,a: uint64x1_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f16_u8,a: uint8x8_t,float16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f32_bf16,a: bfloat16x4_t,float32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f32_f16,a: float16x4_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_f64,a: float64x1_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_p16,a: poly16x4_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_p8,a: poly8x8_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_s16,a: int16x4_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_s32,a: int32x2_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_s64,a: int64x1_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_s8,a: int8x8_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_u16,a: uint16x4_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_u32,a: uint32x2_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_u64,a: uint64x1_t,float32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f32_u8,a: uint8x8_t,float32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f64_bf16,a: bfloat16x4_t,float64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_f64_f16,a: float16x4_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_f32,a: float32x2_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_p16,a: poly16x4_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_p64,a: poly64x1_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_p8,a: poly8x8_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_s16,a: int16x4_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_s32,a: int32x2_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_s64,a: int64x1_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_s8,a: int8x8_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_u16,a: uint16x4_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_u32,a: uint32x2_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_u64,a: uint64x1_t,float64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_f64_u8,a: uint8x8_t,float64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p16_bf16,a: bfloat16x4_t,poly16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p16_f16,a: float16x4_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_f32,a: float32x2_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_f64,a: float64x1_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_p64,a: poly64x1_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_p8,a: poly8x8_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_s16,a: int16x4_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_s32,a: int32x2_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_s64,a: int64x1_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_s8,a: int8x8_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_u16,a: uint16x4_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_u32,a: uint32x2_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_u64,a: uint64x1_t,poly16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p16_u8,a: uint8x8_t,poly16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p64_bf16,a: bfloat16x4_t,poly64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p64_f16,a: float16x4_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_f32,a: float32x2_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_f64,a: float64x1_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_p16,a: poly16x4_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_p8,a: poly8x8_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_s16,a: int16x4_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_s32,a: int32x2_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_s8,a: int8x8_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_u16,a: uint16x4_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_u32,a: uint32x2_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_u64,a: uint64x1_t,poly64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p64_u8,a: uint8x8_t,poly64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p8_bf16,a: bfloat16x4_t,poly8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpret_p8_f16,a: float16x4_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_f32,a: float32x2_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_f64,a: float64x1_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_p16,a: poly16x4_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_p64,a: poly64x1_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_s16,a: int16x4_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_s32,a: int32x2_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_s64,a: int64x1_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_s8,a: int8x8_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_u16,a: uint16x4_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_u32,a: uint32x2_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_u64,a: uint64x1_t,poly8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_p8_u8,a: uint8x8_t,poly8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s16_bf16,a: bfloat16x4_t,int16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s16_f16,a: float16x4_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_f32,a: float32x2_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_f64,a: float64x1_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_p16,a: poly16x4_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_p64,a: poly64x1_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_p8,a: poly8x8_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_s32,a: int32x2_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_s64,a: int64x1_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_s8,a: int8x8_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_u16,a: uint16x4_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_u32,a: uint32x2_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_u64,a: uint64x1_t,int16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s16_u8,a: uint8x8_t,int16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s32_bf16,a: bfloat16x4_t,int32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s32_f16,a: float16x4_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_f32,a: float32x2_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_f64,a: float64x1_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_p16,a: poly16x4_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_p64,a: poly64x1_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_p8,a: poly8x8_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_s16,a: int16x4_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_s64,a: int64x1_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_s8,a: int8x8_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_u16,a: uint16x4_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_u32,a: uint32x2_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_u64,a: uint64x1_t,int32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s32_u8,a: uint8x8_t,int32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s64_bf16,a: bfloat16x4_t,int64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s64_f16,a: float16x4_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_f32,a: float32x2_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_f64,a: float64x1_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_p16,a: poly16x4_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_p64,a: poly64x1_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_p8,a: poly8x8_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_s16,a: int16x4_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_s32,a: int32x2_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_s8,a: int8x8_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_u16,a: uint16x4_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_u32,a: uint32x2_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_u64,a: uint64x1_t,int64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s64_u8,a: uint8x8_t,int64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s8_bf16,a: bfloat16x4_t,int8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpret_s8_f16,a: float16x4_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_f32,a: float32x2_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_f64,a: float64x1_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_p16,a: poly16x4_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_p64,a: poly64x1_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_p8,a: poly8x8_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_s16,a: int16x4_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_s32,a: int32x2_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_s64,a: int64x1_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_u16,a: uint16x4_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_u32,a: uint32x2_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_u64,a: uint64x1_t,int8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_s8_u8,a: uint8x8_t,int8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u16_bf16,a: bfloat16x4_t,uint16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u16_f16,a: float16x4_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_f32,a: float32x2_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_f64,a: float64x1_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_p16,a: poly16x4_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_p64,a: poly64x1_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_p8,a: poly8x8_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_s16,a: int16x4_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_s32,a: int32x2_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_s64,a: int64x1_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_s8,a: int8x8_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_u32,a: uint32x2_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_u64,a: uint64x1_t,uint16x4_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u16_u8,a: uint8x8_t,uint16x4_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u32_bf16,a: bfloat16x4_t,uint32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u32_f16,a: float16x4_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_f32,a: float32x2_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_f64,a: float64x1_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_p16,a: poly16x4_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_p64,a: poly64x1_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_p8,a: poly8x8_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_s16,a: int16x4_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_s32,a: int32x2_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_s64,a: int64x1_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_s8,a: int8x8_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_u16,a: uint16x4_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_u64,a: uint64x1_t,uint32x2_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u32_u8,a: uint8x8_t,uint32x2_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u64_bf16,a: bfloat16x4_t,uint64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u64_f16,a: float16x4_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_f32,a: float32x2_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_f64,a: float64x1_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_p16,a: poly16x4_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_p64,a: poly64x1_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_p8,a: poly8x8_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_s16,a: int16x4_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_s32,a: int32x2_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_s64,a: int64x1_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_s8,a: int8x8_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_u16,a: uint16x4_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_u32,a: uint32x2_t,uint64x1_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u64_u8,a: uint8x8_t,uint64x1_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u8_bf16,a: bfloat16x4_t,uint8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpret_u8_f16,a: float16x4_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_f32,a: float32x2_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_f64,a: float64x1_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_p16,a: poly16x4_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_p64,a: poly64x1_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_p8,a: poly8x8_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_s16,a: int16x4_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_s32,a: int32x2_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_s64,a: int64x1_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_s8,a: int8x8_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_u16,a: uint16x4_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_u32,a: uint32x2_t,uint8x8_t,Vector reinterpret cast operation
+TRUE,vreinterpret_u8_u64,a: uint64x1_t,uint8x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_f32,a: float32x4_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_f64,a: float64x2_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_p128,a: poly128_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_p16,a: poly16x8_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_p64,a: poly64x2_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_p8,a: poly8x16_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_s16,a: int16x8_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_s32,a: int32x4_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_s64,a: int64x2_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_s8,a: int8x16_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_u16,a: uint16x8_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_u32,a: uint32x4_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_u64,a: uint64x2_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_bf16_u8,a: uint8x16_t,bfloat16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_f32,a: float32x4_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_f64,a: float64x2_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_p128,a: poly128_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_p16,a: poly16x8_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_p64,a: poly64x2_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_p8,a: poly8x16_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_s16,a: int16x8_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_s32,a: int32x4_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_s64,a: int64x2_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_s8,a: int8x16_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_u16,a: uint16x8_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_u32,a: uint32x4_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_u64,a: uint64x2_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f16_u8,a: uint8x16_t,float16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f32_bf16,a: bfloat16x8_t,float32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f32_f16,a: float16x8_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_f64,a: float64x2_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_p16,a: poly16x8_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_p8,a: poly8x16_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_s16,a: int16x8_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_s32,a: int32x4_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_s64,a: int64x2_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_s8,a: int8x16_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_u16,a: uint16x8_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_u32,a: uint32x4_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_u64,a: uint64x2_t,float32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f32_u8,a: uint8x16_t,float32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f64_bf16,a: bfloat16x8_t,float64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f64_f16,a: float16x8_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_f32,a: float32x4_t,float64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_f64_p128,a: poly128_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_p16,a: poly16x8_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_p64,a: poly64x2_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_p8,a: poly8x16_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_s16,a: int16x8_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_s32,a: int32x4_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_s64,a: int64x2_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_s8,a: int8x16_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_u16,a: uint16x8_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_u32,a: uint32x4_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_u64,a: uint64x2_t,float64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_f64_u8,a: uint8x16_t,float64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_bf16,a: bfloat16x8_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_f16,a: float16x8_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_f32,a: float32x4_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_f64,a: float64x2_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_p16,a: poly16x8_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_p8,a: poly8x16_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_s16,a: int16x8_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_s32,a: int32x4_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_s64,a: int64x2_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_s8,a: int8x16_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_u16,a: uint16x8_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_u32,a: uint32x4_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_u64,a: uint64x2_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p128_u8,a: uint8x16_t,poly128_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p16_bf16,a: bfloat16x8_t,poly16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p16_f16,a: float16x8_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_f32,a: float32x4_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_f64,a: float64x2_t,poly16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p16_p128,a: poly128_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_p64,a: poly64x2_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_p8,a: poly8x16_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_s16,a: int16x8_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_s32,a: int32x4_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_s64,a: int64x2_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_s8,a: int8x16_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_u16,a: uint16x8_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_u32,a: uint32x4_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_u64,a: uint64x2_t,poly16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p16_u8,a: uint8x16_t,poly16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p64_bf16,a: bfloat16x8_t,poly64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p64_f16,a: float16x8_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_f32,a: float32x4_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_f64,a: float64x2_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_p16,a: poly16x8_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_p8,a: poly8x16_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_s16,a: int16x8_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_s32,a: int32x4_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_s64,a: int64x2_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_s8,a: int8x16_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_u16,a: uint16x8_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_u32,a: uint32x4_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_u64,a: uint64x2_t,poly64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p64_u8,a: uint8x16_t,poly64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p8_bf16,a: bfloat16x8_t,poly8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p8_f16,a: float16x8_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_f32,a: float32x4_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_f64,a: float64x2_t,poly8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_p8_p128,a: poly128_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_p16,a: poly16x8_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_p64,a: poly64x2_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_s16,a: int16x8_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_s32,a: int32x4_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_s64,a: int64x2_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_s8,a: int8x16_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_u16,a: uint16x8_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_u32,a: uint32x4_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_u64,a: uint64x2_t,poly8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_p8_u8,a: uint8x16_t,poly8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s16_bf16,a: bfloat16x8_t,int16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s16_f16,a: float16x8_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_f32,a: float32x4_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_f64,a: float64x2_t,int16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s16_p128,a: poly128_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_p16,a: poly16x8_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_p64,a: poly64x2_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_p8,a: poly8x16_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_s32,a: int32x4_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_s64,a: int64x2_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_s8,a: int8x16_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_u16,a: uint16x8_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_u32,a: uint32x4_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_u64,a: uint64x2_t,int16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s16_u8,a: uint8x16_t,int16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s32_bf16,a: bfloat16x8_t,int32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s32_f16,a: float16x8_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_f32,a: float32x4_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_f64,a: float64x2_t,int32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s32_p128,a: poly128_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_p16,a: poly16x8_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_p64,a: poly64x2_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_p8,a: poly8x16_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_s16,a: int16x8_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_s64,a: int64x2_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_s8,a: int8x16_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_u16,a: uint16x8_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_u32,a: uint32x4_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_u64,a: uint64x2_t,int32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s32_u8,a: uint8x16_t,int32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s64_bf16,a: bfloat16x8_t,int64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s64_f16,a: float16x8_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_f32,a: float32x4_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_f64,a: float64x2_t,int64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s64_p128,a: poly128_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_p16,a: poly16x8_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_p64,a: poly64x2_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_p8,a: poly8x16_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_s16,a: int16x8_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_s32,a: int32x4_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_s8,a: int8x16_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_u16,a: uint16x8_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_u32,a: uint32x4_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_u64,a: uint64x2_t,int64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s64_u8,a: uint8x16_t,int64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s8_bf16,a: bfloat16x8_t,int8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s8_f16,a: float16x8_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_f32,a: float32x4_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_f64,a: float64x2_t,int8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_s8_p128,a: poly128_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_p16,a: poly16x8_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_p64,a: poly64x2_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_p8,a: poly8x16_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_s16,a: int16x8_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_s32,a: int32x4_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_s64,a: int64x2_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_u16,a: uint16x8_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_u32,a: uint32x4_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_u64,a: uint64x2_t,int8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_s8_u8,a: uint8x16_t,int8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u16_bf16,a: bfloat16x8_t,uint16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u16_f16,a: float16x8_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_f32,a: float32x4_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_f64,a: float64x2_t,uint16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u16_p128,a: poly128_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_p16,a: poly16x8_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_p64,a: poly64x2_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_p8,a: poly8x16_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_s16,a: int16x8_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_s32,a: int32x4_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_s64,a: int64x2_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_s8,a: int8x16_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_u32,a: uint32x4_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_u64,a: uint64x2_t,uint16x8_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u16_u8,a: uint8x16_t,uint16x8_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u32_bf16,a: bfloat16x8_t,uint32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u32_f16,a: float16x8_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_f32,a: float32x4_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_f64,a: float64x2_t,uint32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u32_p128,a: poly128_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_p16,a: poly16x8_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_p64,a: poly64x2_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_p8,a: poly8x16_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_s16,a: int16x8_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_s32,a: int32x4_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_s64,a: int64x2_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_s8,a: int8x16_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_u16,a: uint16x8_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_u64,a: uint64x2_t,uint32x4_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u32_u8,a: uint8x16_t,uint32x4_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u64_bf16,a: bfloat16x8_t,uint64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u64_f16,a: float16x8_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_f32,a: float32x4_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_f64,a: float64x2_t,uint64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u64_p128,a: poly128_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_p16,a: poly16x8_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_p64,a: poly64x2_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_p8,a: poly8x16_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_s16,a: int16x8_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_s32,a: int32x4_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_s64,a: int64x2_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_s8,a: int8x16_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_u16,a: uint16x8_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_u32,a: uint32x4_t,uint64x2_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u64_u8,a: uint8x16_t,uint64x2_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u8_bf16,a: bfloat16x8_t,uint8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u8_f16,a: float16x8_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_f32,a: float32x4_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_f64,a: float64x2_t,uint8x16_t,Vector reinterpret cast operation
+FALSE,vreinterpretq_u8_p128,a: poly128_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_p16,a: poly16x8_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_p64,a: poly64x2_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_p8,a: poly8x16_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_s16,a: int16x8_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_s32,a: int32x4_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_s64,a: int64x2_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_s8,a: int8x16_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_u16,a: uint16x8_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_u32,a: uint32x4_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vreinterpretq_u8_u64,a: uint64x2_t,uint8x16_t,Vector reinterpret cast operation
+TRUE,vrev16_p8,vec: poly8x8_t,poly8x8_t,Reverse elements in 16-bit halfwords
+TRUE,vrev16_s8,vec: int8x8_t,int8x8_t,Reverse elements in 16-bit halfwords
+TRUE,vrev16_u8,vec: uint8x8_t,uint8x8_t,Reverse elements in 16-bit halfwords
+TRUE,vrev16q_p8,vec: poly8x16_t,poly8x16_t,Reverse elements in 16-bit halfwords
+TRUE,vrev16q_s8,vec: int8x16_t,int8x16_t,Reverse elements in 16-bit halfwords
+TRUE,vrev16q_u8,vec: uint8x16_t,uint8x16_t,Reverse elements in 16-bit halfwords
+TRUE,vrev32_p16,vec: poly16x4_t,poly16x4_t,Reverse elements in 32-bit words
+TRUE,vrev32_p8,vec: poly8x8_t,poly8x8_t,Reverse elements in 32-bit words
+TRUE,vrev32_s16,vec: int16x4_t,int16x4_t,Reverse elements in 32-bit words
+TRUE,vrev32_s8,vec: int8x8_t,int8x8_t,Reverse elements in 32-bit words
+TRUE,vrev32_u16,vec: uint16x4_t,uint16x4_t,Reverse elements in 32-bit words
+TRUE,vrev32_u8,vec: uint8x8_t,uint8x8_t,Reverse elements in 32-bit words
+TRUE,vrev32q_p16,vec: poly16x8_t,poly16x8_t,Reverse elements in 32-bit words
+TRUE,vrev32q_p8,vec: poly8x16_t,poly8x16_t,Reverse elements in 32-bit words
+TRUE,vrev32q_s16,vec: int16x8_t,int16x8_t,Reverse elements in 32-bit words
+TRUE,vrev32q_s8,vec: int8x16_t,int8x16_t,Reverse elements in 32-bit words
+TRUE,vrev32q_u16,vec: uint16x8_t,uint16x8_t,Reverse elements in 32-bit words
+TRUE,vrev32q_u8,vec: uint8x16_t,uint8x16_t,Reverse elements in 32-bit words
+FALSE,vrev64_f16,vec: float16x4_t,float16x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_f32,vec: float32x2_t,float32x2_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_p16,vec: poly16x4_t,poly16x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_p8,vec: poly8x8_t,poly8x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_s16,vec: int16x4_t,int16x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_s32,vec: int32x2_t,int32x2_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_s8,vec: int8x8_t,int8x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_u16,vec: uint16x4_t,uint16x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_u32,vec: uint32x2_t,uint32x2_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64_u8,vec: uint8x8_t,uint8x8_t,Reverse elements in 64-bit doublewords
+FALSE,vrev64q_f16,vec: float16x8_t,float16x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_f32,vec: float32x4_t,float32x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_p16,vec: poly16x8_t,poly16x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_p8,vec: poly8x16_t,poly8x16_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_s16,vec: int16x8_t,int16x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_s32,vec: int32x4_t,int32x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_s8,vec: int8x16_t,int8x16_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_u16,vec: uint16x8_t,uint16x8_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_u32,vec: uint32x4_t,uint32x4_t,Reverse elements in 64-bit doublewords
+TRUE,vrev64q_u8,vec: uint8x16_t,uint8x16_t,Reverse elements in 64-bit doublewords
+TRUE,vrhadd_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed rounding halving add
+TRUE,vrhadd_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed rounding halving add
+TRUE,vrhadd_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed rounding halving add
+TRUE,vrhadd_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unsigned rounding halving add
+TRUE,vrhadd_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unsigned rounding halving add
+TRUE,vrhadd_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unsigned rounding halving add
+TRUE,vrhaddq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed rounding halving add
+TRUE,vrhaddq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed rounding halving add
+TRUE,vrhaddq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed rounding halving add
+TRUE,vrhaddq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unsigned rounding halving add
+TRUE,vrhaddq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unsigned rounding halving add
+TRUE,vrhaddq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unsigned rounding halving add
+FALSE,vrnd_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, toward zero"
+TRUE,vrnd_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, toward zero"
+TRUE,vrnd_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, toward zero"
+FALSE,vrnd32x_f32,a: float32x2_t,float32x2_t,"Floating-point round to 32-bit integer, using current rounding mode"
+FALSE,vrnd32x_f64,a: float64x1_t,float64x1_t,"Floating-point round to 32-bit integer, using current rounding mode"
+FALSE,vrnd32xq_f32,a: float32x4_t,float32x4_t,"Floating-point round to 32-bit integer, using current rounding mode"
+FALSE,vrnd32xq_f64,a: float64x2_t,float64x2_t,"Floating-point round to 32-bit integer, using current rounding mode"
+FALSE,vrnd32z_f32,a: float32x2_t,float32x2_t,Floating-point round to 32-bit integer toward zero
+FALSE,vrnd32z_f64,a: float64x1_t,float64x1_t,Floating-point round to 32-bit integer toward zero
+FALSE,vrnd32zq_f32,a: float32x4_t,float32x4_t,Floating-point round to 32-bit integer toward zero
+FALSE,vrnd32zq_f64,a: float64x2_t,float64x2_t,Floating-point round to 32-bit integer toward zero
+FALSE,vrnd64x_f32,a: float32x2_t,float32x2_t,"Floating-point round to 64-bit integer, using current rounding mode"
+FALSE,vrnd64x_f64,a: float64x1_t,float64x1_t,"Floating-point round to 64-bit integer, using current rounding mode"
+FALSE,vrnd64xq_f32,a: float32x4_t,float32x4_t,"Floating-point round to 64-bit integer, using current rounding mode"
+FALSE,vrnd64xq_f64,a: float64x2_t,float64x2_t,"Floating-point round to 64-bit integer, using current rounding mode"
+FALSE,vrnd64z_f32,a: float32x2_t,float32x2_t,Floating-point round to 64-bit integer toward zero
+FALSE,vrnd64z_f64,a: float64x1_t,float64x1_t,Floating-point round to 64-bit integer toward zero
+FALSE,vrnd64zq_f32,a: float32x4_t,float32x4_t,Floating-point round to 64-bit integer toward zero
+FALSE,vrnd64zq_f64,a: float64x2_t,float64x2_t,Floating-point round to 64-bit integer toward zero
+FALSE,vrnda_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, to nearest with ties to away"
+TRUE,vrnda_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, to nearest with ties to away"
+TRUE,vrnda_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, to nearest with ties to away"
+FALSE,vrndah_f16,a: float16_t,float16_t,"Floating-point round to integral, to nearest with ties to away"
+FALSE,vrndaq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, to nearest with ties to away"
+TRUE,vrndaq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, to nearest with ties to away"
+TRUE,vrndaq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, to nearest with ties to away"
+FALSE,vrndh_f16,a: float16_t,float16_t,"Floating-point round to integral, toward zero"
+FALSE,vrndi_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, using current rounding mode"
+TRUE,vrndi_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, using current rounding mode"
+TRUE,vrndi_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, using current rounding mode"
+FALSE,vrndih_f16,a: float16_t,float16_t,"Floating-point round to integral, using current rounding mode"
+FALSE,vrndiq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, using current rounding mode"
+TRUE,vrndiq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, using current rounding mode"
+TRUE,vrndiq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, using current rounding mode"
+FALSE,vrndm_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, toward minus infinity"
+TRUE,vrndm_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, toward minus infinity"
+TRUE,vrndm_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, toward minus infinity"
+FALSE,vrndmh_f16,a: float16_t,float16_t,"Floating-point round to integral, toward minus infinity"
+FALSE,vrndmq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, toward minus infinity"
+TRUE,vrndmq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, toward minus infinity"
+TRUE,vrndmq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, toward minus infinity"
+FALSE,vrndn_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, to nearest with ties to even"
+TRUE,vrndn_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, to nearest with ties to even"
+TRUE,vrndn_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, to nearest with ties to even"
+FALSE,vrndnh_f16,a: float16_t,float16_t,"Floating-point round to integral, to nearest with ties to even"
+FALSE,vrndnq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, to nearest with ties to even"
+TRUE,vrndnq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, to nearest with ties to even"
+TRUE,vrndnq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, to nearest with ties to even"
+FALSE,vrndns_f32,a: f32,f32,"Floating-point round to integral, to nearest with ties to even"
+FALSE,vrndp_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral, toward plus infinity"
+TRUE,vrndp_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral, toward plus infinity"
+TRUE,vrndp_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral, toward plus infinity"
+FALSE,vrndph_f16,a: float16_t,float16_t,"Floating-point round to integral, toward plus infinity"
+FALSE,vrndpq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, toward plus infinity"
+TRUE,vrndpq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, toward plus infinity"
+TRUE,vrndpq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, toward plus infinity"
+FALSE,vrndq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral, toward zero"
+TRUE,vrndq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral, toward zero"
+TRUE,vrndq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral, toward zero"
+FALSE,vrndx_f16,a: float16x4_t,float16x4_t,"Floating-point round to integral exact, using current rounding mode"
+TRUE,vrndx_f32,a: float32x2_t,float32x2_t,"Floating-point round to integral exact, using current rounding mode"
+TRUE,vrndx_f64,a: float64x1_t,float64x1_t,"Floating-point round to integral exact, using current rounding mode"
+FALSE,vrndxh_f16,a: float16_t,float16_t,"Floating-point round to integral exact, using current rounding mode"
+FALSE,vrndxq_f16,a: float16x8_t,float16x8_t,"Floating-point round to integral exact, using current rounding mode"
+TRUE,vrndxq_f32,a: float32x4_t,float32x4_t,"Floating-point round to integral exact, using current rounding mode"
+TRUE,vrndxq_f64,a: float64x2_t,float64x2_t,"Floating-point round to integral exact, using current rounding mode"
+TRUE,vrshl_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed rounding shift left
+TRUE,vrshl_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed rounding shift left
+TRUE,vrshl_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed rounding shift left
+TRUE,vrshl_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed rounding shift left
+TRUE,vrshl_u16,"a: uint16x4_t, b: int16x4_t",uint16x4_t,Unsigned rounding shift left
+TRUE,vrshl_u32,"a: uint32x2_t, b: int32x2_t",uint32x2_t,Unsigned rounding shift left
+TRUE,vrshl_u64,"a: uint64x1_t, b: int64x1_t",uint64x1_t,Unsigned rounding shift left
+TRUE,vrshl_u8,"a: uint8x8_t, b: int8x8_t",uint8x8_t,Unsigned rounding shift left
+TRUE,vrshld_s64,"a: i64, b: i64",i64,Signed rounding shift left
+TRUE,vrshld_u64,"a: u64, b: i64",u64,Unsigned rounding shift left
+TRUE,vrshlq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed rounding shift left
+TRUE,vrshlq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed rounding shift left
+TRUE,vrshlq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed rounding shift left
+TRUE,vrshlq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed rounding shift left
+TRUE,vrshlq_u16,"a: uint16x8_t, b: int16x8_t",uint16x8_t,Unsigned rounding shift left
+TRUE,vrshlq_u32,"a: uint32x4_t, b: int32x4_t",uint32x4_t,Unsigned rounding shift left
+TRUE,vrshlq_u64,"a: uint64x2_t, b: int64x2_t",uint64x2_t,Unsigned rounding shift left
+TRUE,vrshlq_u8,"a: uint8x16_t, b: int8x16_t",uint8x16_t,Unsigned rounding shift left
+TRUE,vrshr_n_s16,"a: int16x4_t, n: const int",int16x4_t,Signed rounding shift right
+TRUE,vrshr_n_s32,"a: int32x2_t, n: const int",int32x2_t,Signed rounding shift right
+TRUE,vrshr_n_s64,"a: int64x1_t, n: const int",int64x1_t,Signed rounding shift right
+TRUE,vrshr_n_s8,"a: int8x8_t, n: const int",int8x8_t,Signed rounding shift right
+TRUE,vrshr_n_u16,"a: uint16x4_t, n: const int",uint16x4_t,Unsigned rounding shift right
+TRUE,vrshr_n_u32,"a: uint32x2_t, n: const int",uint32x2_t,Unsigned rounding shift right
+TRUE,vrshr_n_u64,"a: uint64x1_t, n: const int",uint64x1_t,Unsigned rounding shift right
+TRUE,vrshr_n_u8,"a: uint8x8_t, n: const int",uint8x8_t,Unsigned rounding shift right
+TRUE,vrshrd_n_s64,"a: i64, n: const int",i64,Signed rounding shift right
+TRUE,vrshrd_n_u64,"a: u64, n: const int",u64,Unsigned rounding shift right
+TRUE,vrshrn_high_n_s16,"r: int8x8_t, a: int16x8_t, n: const int",int8x16_t,Rounding shift right narrow
+TRUE,vrshrn_high_n_s32,"r: int16x4_t, a: int32x4_t, n: const int",int16x8_t,Rounding shift right narrow
+TRUE,vrshrn_high_n_s64,"r: int32x2_t, a: int64x2_t, n: const int",int32x4_t,Rounding shift right narrow
+TRUE,vrshrn_high_n_u16,"r: uint8x8_t, a: uint16x8_t, n: const int",uint8x16_t,Rounding shift right narrow
+TRUE,vrshrn_high_n_u32,"r: uint16x4_t, a: uint32x4_t, n: const int",uint16x8_t,Rounding shift right narrow
+TRUE,vrshrn_high_n_u64,"r: uint32x2_t, a: uint64x2_t, n: const int",uint32x4_t,Rounding shift right narrow
+TRUE,vrshrn_n_s16,"a: int16x8_t, n: const int",int8x8_t,Rounding shift right narrow
+TRUE,vrshrn_n_s32,"a: int32x4_t, n: const int",int16x4_t,Rounding shift right narrow
+TRUE,vrshrn_n_s64,"a: int64x2_t, n: const int",int32x2_t,Rounding shift right narrow
+TRUE,vrshrn_n_u16,"a: uint16x8_t, n: const int",uint8x8_t,Rounding shift right narrow
+TRUE,vrshrn_n_u32,"a: uint32x4_t, n: const int",uint16x4_t,Rounding shift right narrow
+TRUE,vrshrn_n_u64,"a: uint64x2_t, n: const int",uint32x2_t,Rounding shift right narrow
+TRUE,vrshrq_n_s16,"a: int16x8_t, n: const int",int16x8_t,Signed rounding shift right
+TRUE,vrshrq_n_s32,"a: int32x4_t, n: const int",int32x4_t,Signed rounding shift right
+TRUE,vrshrq_n_s64,"a: int64x2_t, n: const int",int64x2_t,Signed rounding shift right
+TRUE,vrshrq_n_s8,"a: int8x16_t, n: const int",int8x16_t,Signed rounding shift right
+TRUE,vrshrq_n_u16,"a: uint16x8_t, n: const int",uint16x8_t,Unsigned rounding shift right
+TRUE,vrshrq_n_u32,"a: uint32x4_t, n: const int",uint32x4_t,Unsigned rounding shift right
+TRUE,vrshrq_n_u64,"a: uint64x2_t, n: const int",uint64x2_t,Unsigned rounding shift right
+TRUE,vrshrq_n_u8,"a: uint8x16_t, n: const int",uint8x16_t,Unsigned rounding shift right
+FALSE,vrsqrte_f16,a: float16x4_t,float16x4_t,Floating-point reciprocal square root estimate
+TRUE,vrsqrte_f32,a: float32x2_t,float32x2_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrte_f64,a: float64x1_t,float64x1_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrte_u32,a: uint32x2_t,uint32x2_t,Unsigned reciprocal square root estimate
+FALSE,vrsqrted_f64,a: float64_t,float64_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrteh_f16,a: float16_t,float16_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrteq_f16,a: float16x8_t,float16x8_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrteq_f32,a: float32x4_t,float32x4_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrteq_f64,a: float64x2_t,float64x2_t,Floating-point reciprocal square root estimate
+FALSE,vrsqrteq_u32,a: uint32x4_t,uint32x4_t,Unsigned reciprocal square root estimate
+FALSE,vrsqrtes_f32,a: f32,f32,Floating-point reciprocal square root estimate
+FALSE,vrsqrts_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point reciprocal square root step
+FALSE,vrsqrts_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point reciprocal square root step
+FALSE,vrsqrts_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point reciprocal square root step
+FALSE,vrsqrtsd_f64,"a: float64_t, b: float64_t",float64_t,Floating-point reciprocal square root step
+FALSE,vrsqrtsh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point reciprocal square root step
+FALSE,vrsqrtsq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point reciprocal square root step
+FALSE,vrsqrtsq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point reciprocal square root step
+FALSE,vrsqrtsq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point reciprocal square root step
+FALSE,vrsqrtss_f32,"a: f32, b: f32",f32,Floating-point reciprocal square root step
+TRUE,vrsra_n_s16,"a: int16x4_t, b: int16x4_t, n: const int",int16x4_t,Signed rounding shift right and accumulate
+TRUE,vrsra_n_s32,"a: int32x2_t, b: int32x2_t, n: const int",int32x2_t,Signed rounding shift right and accumulate
+TRUE,vrsra_n_s64,"a: int64x1_t, b: int64x1_t, n: const int",int64x1_t,Signed rounding shift right and accumulate
+TRUE,vrsra_n_s8,"a: int8x8_t, b: int8x8_t, n: const int",int8x8_t,Signed rounding shift right and accumulate
+TRUE,vrsra_n_u16,"a: uint16x4_t, b: uint16x4_t, n: const int",uint16x4_t,Unsigned rounding shift right and accumulate
+TRUE,vrsra_n_u32,"a: uint32x2_t, b: uint32x2_t, n: const int",uint32x2_t,Unsigned rounding shift right and accumulate
+TRUE,vrsra_n_u64,"a: uint64x1_t, b: uint64x1_t, n: const int",uint64x1_t,Unsigned rounding shift right and accumulate
+TRUE,vrsra_n_u8,"a: uint8x8_t, b: uint8x8_t, n: const int",uint8x8_t,Unsigned rounding shift right and accumulate
+TRUE,vrsrad_n_s64,"a: i64, b: i64, n: const int",i64,Signed rounding shift right and accumulate
+TRUE,vrsrad_n_u64,"a: u64, b: u64, n: const int",u64,Unsigned rounding shift right and accumulate
+TRUE,vrsraq_n_s16,"a: int16x8_t, b: int16x8_t, n: const int",int16x8_t,Signed rounding shift right and accumulate
+TRUE,vrsraq_n_s32,"a: int32x4_t, b: int32x4_t, n: const int",int32x4_t,Signed rounding shift right and accumulate
+TRUE,vrsraq_n_s64,"a: int64x2_t, b: int64x2_t, n: const int",int64x2_t,Signed rounding shift right and accumulate
+TRUE,vrsraq_n_s8,"a: int8x16_t, b: int8x16_t, n: const int",int8x16_t,Signed rounding shift right and accumulate
+TRUE,vrsraq_n_u16,"a: uint16x8_t, b: uint16x8_t, n: const int",uint16x8_t,Unsigned rounding shift right and accumulate
+TRUE,vrsraq_n_u32,"a: uint32x4_t, b: uint32x4_t, n: const int",uint32x4_t,Unsigned rounding shift right and accumulate
+TRUE,vrsraq_n_u64,"a: uint64x2_t, b: uint64x2_t, n: const int",uint64x2_t,Unsigned rounding shift right and accumulate
+TRUE,vrsraq_n_u8,"a: uint8x16_t, b: uint8x16_t, n: const int",uint8x16_t,Unsigned rounding shift right and accumulate
+FALSE,vrsubhn_high_s16,"r: int8x8_t, a: int16x8_t, b: int16x8_t",int8x16_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_high_s32,"r: int16x4_t, a: int32x4_t, b: int32x4_t",int16x8_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_high_s64,"r: int32x2_t, a: int64x2_t, b: int64x2_t",int32x4_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_high_u16,"r: uint8x8_t, a: uint16x8_t, b: uint16x8_t",uint8x16_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_high_u32,"r: uint16x4_t, a: uint32x4_t, b: uint32x4_t",uint16x8_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_high_u64,"r: uint32x2_t, a: uint64x2_t, b: uint64x2_t",uint32x4_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_s16,"a: int16x8_t, b: int16x8_t",int8x8_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_s32,"a: int32x4_t, b: int32x4_t",int16x4_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_s64,"a: int64x2_t, b: int64x2_t",int32x2_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_u16,"a: uint16x8_t, b: uint16x8_t",uint8x8_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_u32,"a: uint32x4_t, b: uint32x4_t",uint16x4_t,Rounding subtract returning high narrow
+FALSE,vrsubhn_u64,"a: uint64x2_t, b: uint64x2_t",uint32x2_t,Rounding subtract returning high narrow
+FALSE,vset_lane_bf16,"a: bfloat16_t, v: bfloat16x4_t, lane: const int",bfloat16x4_t,Insert vector element from another vector element
+FALSE,vset_lane_f16,"a: float16_t, v: float16x4_t, lane: const int",float16x4_t,Insert vector element from another vector element
+FALSE,vset_lane_f32,"a: f32, v: float32x2_t, lane: const int",float32x2_t,Insert vector element from another vector element
+FALSE,vset_lane_f64,"a: float64_t, v: float64x1_t, lane: const int",float64x1_t,Insert vector element from another vector element
+FALSE,vset_lane_p16,"a: poly16_t, v: poly16x4_t, lane: const int",poly16x4_t,Insert vector element from another vector element
+FALSE,vset_lane_p64,"a: poly64_t, v: poly64x1_t, lane: const int",poly64x1_t,Insert vector element from another vector element
+FALSE,vset_lane_p8,"a: poly8_t, v: poly8x8_t, lane: const int",poly8x8_t,Insert vector element from another vector element
+FALSE,vset_lane_s16,"a: i16, v: int16x4_t, lane: const int",int16x4_t,Insert vector element from another vector element
+FALSE,vset_lane_s32,"a: i32, v: int32x2_t, lane: const int",int32x2_t,Insert vector element from another vector element
+FALSE,vset_lane_s64,"a: i64, v: int64x1_t, lane: const int",int64x1_t,Insert vector element from another vector element
+FALSE,vset_lane_s8,"a: i8, v: int8x8_t, lane: const int",int8x8_t,Insert vector element from another vector element
+FALSE,vset_lane_u16,"a: u16, v: uint16x4_t, lane: const int",uint16x4_t,Insert vector element from another vector element
+FALSE,vset_lane_u32,"a: u32, v: uint32x2_t, lane: const int",uint32x2_t,Insert vector element from another vector element
+FALSE,vset_lane_u64,"a: u64, v: uint64x1_t, lane: const int",uint64x1_t,Insert vector element from another vector element
+FALSE,vset_lane_u8,"a: u8, v: uint8x8_t, lane: const int",uint8x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_bf16,"a: bfloat16_t, v: bfloat16x8_t, lane: const int",bfloat16x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_f16,"a: float16_t, v: float16x8_t, lane: const int",float16x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_f32,"a: f32, v: float32x4_t, lane: const int",float32x4_t,Insert vector element from another vector element
+FALSE,vsetq_lane_f64,"a: float64_t, v: float64x2_t, lane: const int",float64x2_t,Insert vector element from another vector element
+FALSE,vsetq_lane_p16,"a: poly16_t, v: poly16x8_t, lane: const int",poly16x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_p64,"a: poly64_t, v: poly64x2_t, lane: const int",poly64x2_t,Insert vector element from another vector element
+FALSE,vsetq_lane_p8,"a: poly8_t, v: poly8x16_t, lane: const int",poly8x16_t,Insert vector element from another vector element
+FALSE,vsetq_lane_s16,"a: i16, v: int16x8_t, lane: const int",int16x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_s32,"a: i32, v: int32x4_t, lane: const int",int32x4_t,Insert vector element from another vector element
+FALSE,vsetq_lane_s64,"a: i64, v: int64x2_t, lane: const int",int64x2_t,Insert vector element from another vector element
+FALSE,vsetq_lane_s8,"a: i8, v: int8x16_t, lane: const int",int8x16_t,Insert vector element from another vector element
+FALSE,vsetq_lane_u16,"a: u16, v: uint16x8_t, lane: const int",uint16x8_t,Insert vector element from another vector element
+FALSE,vsetq_lane_u32,"a: u32, v: uint32x4_t, lane: const int",uint32x4_t,Insert vector element from another vector element
+FALSE,vsetq_lane_u64,"a: u64, v: uint64x2_t, lane: const int",uint64x2_t,Insert vector element from another vector element
+FALSE,vsetq_lane_u8,"a: u8, v: uint8x16_t, lane: const int",uint8x16_t,Insert vector element from another vector element
+TRUE,vsha1cq_u32,"hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t",uint32x4_t,SHA1 hash update (choose)
+TRUE,vsha1h_u32,hash_e: u32,u32,SHA1 fixed rotate
+TRUE,vsha1mq_u32,"hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t",uint32x4_t,SHA1 hash update (majority)
+TRUE,vsha1pq_u32,"hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t",uint32x4_t,SHA1 hash update (parity)
+TRUE,vsha1su0q_u32,"w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t",uint32x4_t,SHA1 schedule update 0
+TRUE,vsha1su1q_u32,"tw0_3: uint32x4_t, w12_15: uint32x4_t",uint32x4_t,SHA1 schedule update 1
+TRUE,vsha256h2q_u32,"hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t",uint32x4_t,SHA256 hash update (part 2)
+TRUE,vsha256hq_u32,"hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t",uint32x4_t,SHA256 hash update (part 1)
+TRUE,vsha256su0q_u32,"w0_3: uint32x4_t, w4_7: uint32x4_t",uint32x4_t,SHA256 schedule update 0
+TRUE,vsha256su1q_u32,"tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t",uint32x4_t,SHA256 schedule update 1
+FALSE,vsha512h2q_u64,"sum_ab: uint64x2_t, hash_c_: uint64x2_t, hash_ab: uint64x2_t",uint64x2_t,SHA512 hash update part 2
+FALSE,vsha512hq_u64,"hash_ed: uint64x2_t, hash_gf: uint64x2_t, kwh_kwh2: uint64x2_t",uint64x2_t,SHA512 hash update part 1
+FALSE,vsha512su0q_u64,"w0_1: uint64x2_t, w2_: uint64x2_t",uint64x2_t,SHA512 schedule update 0
+FALSE,vsha512su1q_u64,"s01_s02: uint64x2_t, w14_15: uint64x2_t, w9_10: uint64x2_t",uint64x2_t,SHA512 schedule update 1
+TRUE,vshl_n_s16,"a: int16x4_t, n: const int",int16x4_t,Shift left
+TRUE,vshl_n_s32,"a: int32x2_t, n: const int",int32x2_t,Shift left
+TRUE,vshl_n_s64,"a: int64x1_t, n: const int",int64x1_t,Shift left
+TRUE,vshl_n_s8,"a: int8x8_t, n: const int",int8x8_t,Shift left
+TRUE,vshl_n_u16,"a: uint16x4_t, n: const int",uint16x4_t,Shift left
+TRUE,vshl_n_u32,"a: uint32x2_t, n: const int",uint32x2_t,Shift left
+TRUE,vshl_n_u64,"a: uint64x1_t, n: const int",uint64x1_t,Shift left
+TRUE,vshl_n_u8,"a: uint8x8_t, n: const int",uint8x8_t,Shift left
+TRUE,vshl_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Signed shift left
+TRUE,vshl_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Signed shift left
+TRUE,vshl_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Signed shift left
+TRUE,vshl_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Signed shift left
+TRUE,vshl_u16,"a: uint16x4_t, b: int16x4_t",uint16x4_t,Unsigned shift left
+TRUE,vshl_u32,"a: uint32x2_t, b: int32x2_t",uint32x2_t,Unsigned shift left
+TRUE,vshl_u64,"a: uint64x1_t, b: int64x1_t",uint64x1_t,Unsigned shift left
+TRUE,vshl_u8,"a: uint8x8_t, b: int8x8_t",uint8x8_t,Unsigned shift left
+TRUE,vshld_n_s64,"a: i64, n: const int",i64,Shift left
+TRUE,vshld_n_u64,"a: u64, n: const int",u64,Shift left
+TRUE,vshld_s64,"a: i64, b: i64",i64,Signed shift left
+TRUE,vshld_u64,"a: u64, b: i64",u64,Unsigned shift left
+TRUE,vshll_high_n_s16,"a: int16x8_t, n: const int",int32x4_t,Shift left long
+TRUE,vshll_high_n_s16,"a: int16x8_t, n: const int",int32x4_t,Signed shift left long
+TRUE,vshll_high_n_s32,"a: int32x4_t, n: const int",int64x2_t,Shift left long
+TRUE,vshll_high_n_s32,"a: int32x4_t, n: const int",int64x2_t,Signed shift left long
+TRUE,vshll_high_n_s8,"a: int8x16_t, n: const int",int16x8_t,Shift left long
+TRUE,vshll_high_n_s8,"a: int8x16_t, n: const int",int16x8_t,Signed shift left long
+TRUE,vshll_high_n_u16,"a: uint16x8_t, n: const int",uint32x4_t,Shift left long
+TRUE,vshll_high_n_u16,"a: uint16x8_t, n: const int",uint32x4_t,Unsigned shift left long
+TRUE,vshll_high_n_u32,"a: uint32x4_t, n: const int",uint64x2_t,Shift left long
+TRUE,vshll_high_n_u32,"a: uint32x4_t, n: const int",uint64x2_t,Unsigned shift left long
+TRUE,vshll_high_n_u8,"a: uint8x16_t, n: const int",uint16x8_t,Shift left long
+TRUE,vshll_high_n_u8,"a: uint8x16_t, n: const int",uint16x8_t,Unsigned shift left long
+TRUE,vshll_n_s16,"a: int16x4_t, n: const int",int32x4_t,Shift left long
+TRUE,vshll_n_s16,"a: int16x4_t, n: const int",int32x4_t,Signed shift left long
+TRUE,vshll_n_s32,"a: int32x2_t, n: const int",int64x2_t,Shift left long
+TRUE,vshll_n_s32,"a: int32x2_t, n: const int",int64x2_t,Signed shift left long
+TRUE,vshll_n_s8,"a: int8x8_t, n: const int",int16x8_t,Shift left long
+TRUE,vshll_n_s8,"a: int8x8_t, n: const int",int16x8_t,Signed shift left long
+TRUE,vshll_n_u16,"a: uint16x4_t, n: const int",uint32x4_t,Shift left long
+TRUE,vshll_n_u16,"a: uint16x4_t, n: const int",uint32x4_t,Unsigned shift left long
+TRUE,vshll_n_u32,"a: uint32x2_t, n: const int",uint64x2_t,Shift left long
+TRUE,vshll_n_u32,"a: uint32x2_t, n: const int",uint64x2_t,Unsigned shift left long
+TRUE,vshll_n_u8,"a: uint8x8_t, n: const int",uint16x8_t,Shift left long
+TRUE,vshll_n_u8,"a: uint8x8_t, n: const int",uint16x8_t,Unsigned shift left long
+TRUE,vshlq_n_s16,"a: int16x8_t, n: const int",int16x8_t,Shift left
+TRUE,vshlq_n_s32,"a: int32x4_t, n: const int",int32x4_t,Shift left
+TRUE,vshlq_n_s64,"a: int64x2_t, n: const int",int64x2_t,Shift left
+TRUE,vshlq_n_s8,"a: int8x16_t, n: const int",int8x16_t,Shift left
+TRUE,vshlq_n_u16,"a: uint16x8_t, n: const int",uint16x8_t,Shift left
+TRUE,vshlq_n_u32,"a: uint32x4_t, n: const int",uint32x4_t,Shift left
+TRUE,vshlq_n_u64,"a: uint64x2_t, n: const int",uint64x2_t,Shift left
+TRUE,vshlq_n_u8,"a: uint8x16_t, n: const int",uint8x16_t,Shift left
+TRUE,vshlq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Signed shift left
+TRUE,vshlq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Signed shift left
+TRUE,vshlq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Signed shift left
+TRUE,vshlq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Signed shift left
+TRUE,vshlq_u16,"a: uint16x8_t, b: int16x8_t",uint16x8_t,Unsigned shift left
+TRUE,vshlq_u32,"a: uint32x4_t, b: int32x4_t",uint32x4_t,Unsigned shift left
+TRUE,vshlq_u64,"a: uint64x2_t, b: int64x2_t",uint64x2_t,Unsigned shift left
+TRUE,vshlq_u8,"a: uint8x16_t, b: int8x16_t",uint8x16_t,Unsigned shift left
+TRUE,vshr_n_s16,"a: int16x4_t, n: const int",int16x4_t,Signed shift right
+TRUE,vshr_n_s32,"a: int32x2_t, n: const int",int32x2_t,Signed shift right
+TRUE,vshr_n_s64,"a: int64x1_t, n: const int",int64x1_t,Signed shift right
+TRUE,vshr_n_s8,"a: int8x8_t, n: const int",int8x8_t,Signed shift right
+TRUE,vshr_n_u16,"a: uint16x4_t, n: const int",uint16x4_t,Unsigned shift right
+TRUE,vshr_n_u32,"a: uint32x2_t, n: const int",uint32x2_t,Unsigned shift right
+TRUE,vshr_n_u64,"a: uint64x1_t, n: const int",uint64x1_t,Unsigned shift right
+TRUE,vshr_n_u8,"a: uint8x8_t, n: const int",uint8x8_t,Unsigned shift right
+TRUE,vshrd_n_s64,"a: i64, n: const int",i64,Signed shift right
+TRUE,vshrd_n_u64,"a: u64, n: const int",u64,Unsigned shift right
+TRUE,vshrn_high_n_s16,"r: int8x8_t, a: int16x8_t, n: const int",int8x16_t,Shift right narrow
+TRUE,vshrn_high_n_s32,"r: int16x4_t, a: int32x4_t, n: const int",int16x8_t,Shift right narrow
+TRUE,vshrn_high_n_s64,"r: int32x2_t, a: int64x2_t, n: const int",int32x4_t,Shift right narrow
+TRUE,vshrn_high_n_u16,"r: uint8x8_t, a: uint16x8_t, n: const int",uint8x16_t,Shift right narrow
+TRUE,vshrn_high_n_u32,"r: uint16x4_t, a: uint32x4_t, n: const int",uint16x8_t,Shift right narrow
+TRUE,vshrn_high_n_u64,"r: uint32x2_t, a: uint64x2_t, n: const int",uint32x4_t,Shift right narrow
+TRUE,vshrn_n_s16,"a: int16x8_t, n: const int",int8x8_t,Shift right narrow
+TRUE,vshrn_n_s32,"a: int32x4_t, n: const int",int16x4_t,Shift right narrow
+TRUE,vshrn_n_s64,"a: int64x2_t, n: const int",int32x2_t,Shift right narrow
+TRUE,vshrn_n_u16,"a: uint16x8_t, n: const int",uint8x8_t,Shift right narrow
+TRUE,vshrn_n_u32,"a: uint32x4_t, n: const int",uint16x4_t,Shift right narrow
+TRUE,vshrn_n_u64,"a: uint64x2_t, n: const int",uint32x2_t,Shift right narrow
+TRUE,vshrq_n_s16,"a: int16x8_t, n: const int",int16x8_t,Signed shift right
+TRUE,vshrq_n_s32,"a: int32x4_t, n: const int",int32x4_t,Signed shift right
+TRUE,vshrq_n_s64,"a: int64x2_t, n: const int",int64x2_t,Signed shift right
+TRUE,vshrq_n_s8,"a: int8x16_t, n: const int",int8x16_t,Signed shift right
+TRUE,vshrq_n_u16,"a: uint16x8_t, n: const int",uint16x8_t,Unsigned shift right
+TRUE,vshrq_n_u32,"a: uint32x4_t, n: const int",uint32x4_t,Unsigned shift right
+TRUE,vshrq_n_u64,"a: uint64x2_t, n: const int",uint64x2_t,Unsigned shift right
+TRUE,vshrq_n_u8,"a: uint8x16_t, n: const int",uint8x16_t,Unsigned shift right
+TRUE,vsli_n_p16,"a: poly16x4_t, b: poly16x4_t, n: const int",poly16x4_t,Shift left and insert
+FALSE,vsli_n_p64,"a: poly64x1_t, b: poly64x1_t, n: const int",poly64x1_t,Shift left and insert
+TRUE,vsli_n_p8,"a: poly8x8_t, b: poly8x8_t, n: const int",poly8x8_t,Shift left and insert
+TRUE,vsli_n_s16,"a: int16x4_t, b: int16x4_t, n: const int",int16x4_t,Shift left and insert
+TRUE,vsli_n_s32,"a: int32x2_t, b: int32x2_t, n: const int",int32x2_t,Shift left and insert
+TRUE,vsli_n_s64,"a: int64x1_t, b: int64x1_t, n: const int",int64x1_t,Shift left and insert
+TRUE,vsli_n_s8,"a: int8x8_t, b: int8x8_t, n: const int",int8x8_t,Shift left and insert
+TRUE,vsli_n_u16,"a: uint16x4_t, b: uint16x4_t, n: const int",uint16x4_t,Shift left and insert
+TRUE,vsli_n_u32,"a: uint32x2_t, b: uint32x2_t, n: const int",uint32x2_t,Shift left and insert
+TRUE,vsli_n_u64,"a: uint64x1_t, b: uint64x1_t, n: const int",uint64x1_t,Shift left and insert
+TRUE,vsli_n_u8,"a: uint8x8_t, b: uint8x8_t, n: const int",uint8x8_t,Shift left and insert
+TRUE,vslid_n_s64,"a: i64, b: i64, n: const int",i64,Shift left and insert
+TRUE,vslid_n_u64,"a: u64, b: u64, n: const int",u64,Shift left and insert
+TRUE,vsliq_n_p16,"a: poly16x8_t, b: poly16x8_t, n: const int",poly16x8_t,Shift left and insert
+FALSE,vsliq_n_p64,"a: poly64x2_t, b: poly64x2_t, n: const int",poly64x2_t,Shift left and insert
+TRUE,vsliq_n_p8,"a: poly8x16_t, b: poly8x16_t, n: const int",poly8x16_t,Shift left and insert
+TRUE,vsliq_n_s16,"a: int16x8_t, b: int16x8_t, n: const int",int16x8_t,Shift left and insert
+TRUE,vsliq_n_s32,"a: int32x4_t, b: int32x4_t, n: const int",int32x4_t,Shift left and insert
+TRUE,vsliq_n_s64,"a: int64x2_t, b: int64x2_t, n: const int",int64x2_t,Shift left and insert
+TRUE,vsliq_n_s8,"a: int8x16_t, b: int8x16_t, n: const int",int8x16_t,Shift left and insert
+TRUE,vsliq_n_u16,"a: uint16x8_t, b: uint16x8_t, n: const int",uint16x8_t,Shift left and insert
+TRUE,vsliq_n_u32,"a: uint32x4_t, b: uint32x4_t, n: const int",uint32x4_t,Shift left and insert
+TRUE,vsliq_n_u64,"a: uint64x2_t, b: uint64x2_t, n: const int",uint64x2_t,Shift left and insert
+TRUE,vsliq_n_u8,"a: uint8x16_t, b: uint8x16_t, n: const int",uint8x16_t,Shift left and insert
+FALSE,vsm3partw1q_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,SM3PARTW1
+FALSE,vsm3partw2q_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,SM3PARTW2
+FALSE,vsm3ss1q_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t",uint32x4_t,SM3SS1
+FALSE,vsm3tt1aq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: const int",uint32x4_t,SM3TT1A
+FALSE,vsm3tt1bq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: const int",uint32x4_t,SM3TT1B
+FALSE,vsm3tt2aq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: const int",uint32x4_t,SM3TT2A
+FALSE,vsm3tt2bq_u32,"a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: const int",uint32x4_t,SM3TT2B
+FALSE,vsm4ekeyq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,SM4 key
+FALSE,vsm4eq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,SM4 encode
+FALSE,vsqadd_u16,"a: uint16x4_t, b: int16x4_t",uint16x4_t,Unsigned saturating accumulate of signed value
+FALSE,vsqadd_u32,"a: uint32x2_t, b: int32x2_t",uint32x2_t,Unsigned saturating accumulate of signed value
+FALSE,vsqadd_u64,"a: uint64x1_t, b: int64x1_t",uint64x1_t,Unsigned saturating accumulate of signed value
+FALSE,vsqadd_u8,"a: uint8x8_t, b: int8x8_t",uint8x8_t,Unsigned saturating accumulate of signed value
+FALSE,vsqaddb_u8,"a: u8, b: i8",u8,Unsigned saturating accumulate of signed value
+FALSE,vsqaddd_u64,"a: u64, b: i64",u64,Unsigned saturating accumulate of signed value
+FALSE,vsqaddh_u16,"a: u16, b: i16",u16,Unsigned saturating accumulate of signed value
+FALSE,vsqaddq_u16,"a: uint16x8_t, b: int16x8_t",uint16x8_t,Unsigned saturating accumulate of signed value
+FALSE,vsqaddq_u32,"a: uint32x4_t, b: int32x4_t",uint32x4_t,Unsigned saturating accumulate of signed value
+FALSE,vsqaddq_u64,"a: uint64x2_t, b: int64x2_t",uint64x2_t,Unsigned saturating accumulate of signed value
+FALSE,vsqaddq_u8,"a: uint8x16_t, b: int8x16_t",uint8x16_t,Unsigned saturating accumulate of signed value
+FALSE,vsqadds_u32,"a: u32, b: i32",u32,Unsigned saturating accumulate of signed value
+FALSE,vsqrt_f16,a: float16x4_t,float16x4_t,Floating-point square root
+TRUE,vsqrt_f32,a: float32x2_t,float32x2_t,Floating-point square root
+TRUE,vsqrt_f64,a: float64x1_t,float64x1_t,Floating-point square root
+FALSE,vsqrth_f16,a: float16_t,float16_t,Floating-point square root
+FALSE,vsqrtq_f16,a: float16x8_t,float16x8_t,Floating-point square root
+TRUE,vsqrtq_f32,a: float32x4_t,float32x4_t,Floating-point square root
+TRUE,vsqrtq_f64,a: float64x2_t,float64x2_t,Floating-point square root
+TRUE,vsra_n_s16,"a: int16x4_t, b: int16x4_t, n: const int",int16x4_t,Signed shift right and accumulate
+TRUE,vsra_n_s32,"a: int32x2_t, b: int32x2_t, n: const int",int32x2_t,Signed shift right and accumulate
+TRUE,vsra_n_s64,"a: int64x1_t, b: int64x1_t, n: const int",int64x1_t,Signed shift right and accumulate
+TRUE,vsra_n_s8,"a: int8x8_t, b: int8x8_t, n: const int",int8x8_t,Signed shift right and accumulate
+TRUE,vsra_n_u16,"a: uint16x4_t, b: uint16x4_t, n: const int",uint16x4_t,Unsigned shift right and accumulate
+TRUE,vsra_n_u32,"a: uint32x2_t, b: uint32x2_t, n: const int",uint32x2_t,Unsigned shift right and accumulate
+TRUE,vsra_n_u64,"a: uint64x1_t, b: uint64x1_t, n: const int",uint64x1_t,Unsigned shift right and accumulate
+TRUE,vsra_n_u8,"a: uint8x8_t, b: uint8x8_t, n: const int",uint8x8_t,Unsigned shift right and accumulate
+TRUE,vsrad_n_s64,"a: i64, b: i64, n: const int",i64,Signed shift right and accumulate
+TRUE,vsrad_n_u64,"a: u64, b: u64, n: const int",u64,Unsigned shift right and accumulate
+TRUE,vsraq_n_s16,"a: int16x8_t, b: int16x8_t, n: const int",int16x8_t,Signed shift right and accumulate
+TRUE,vsraq_n_s32,"a: int32x4_t, b: int32x4_t, n: const int",int32x4_t,Signed shift right and accumulate
+TRUE,vsraq_n_s64,"a: int64x2_t, b: int64x2_t, n: const int",int64x2_t,Signed shift right and accumulate
+TRUE,vsraq_n_s8,"a: int8x16_t, b: int8x16_t, n: const int",int8x16_t,Signed shift right and accumulate
+TRUE,vsraq_n_u16,"a: uint16x8_t, b: uint16x8_t, n: const int",uint16x8_t,Unsigned shift right and accumulate
+TRUE,vsraq_n_u32,"a: uint32x4_t, b: uint32x4_t, n: const int",uint32x4_t,Unsigned shift right and accumulate
+TRUE,vsraq_n_u64,"a: uint64x2_t, b: uint64x2_t, n: const int",uint64x2_t,Unsigned shift right and accumulate
+TRUE,vsraq_n_u8,"a: uint8x16_t, b: uint8x16_t, n: const int",uint8x16_t,Unsigned shift right and accumulate
+TRUE,vsri_n_p16,"a: poly16x4_t, b: poly16x4_t, n: const int",poly16x4_t,Shift right and insert
+FALSE,vsri_n_p64,"a: poly64x1_t, b: poly64x1_t, n: const int",poly64x1_t,Shift right and insert
+TRUE,vsri_n_p8,"a: poly8x8_t, b: poly8x8_t, n: const int",poly8x8_t,Shift right and insert
+TRUE,vsri_n_s16,"a: int16x4_t, b: int16x4_t, n: const int",int16x4_t,Shift right and insert
+TRUE,vsri_n_s32,"a: int32x2_t, b: int32x2_t, n: const int",int32x2_t,Shift right and insert
+TRUE,vsri_n_s64,"a: int64x1_t, b: int64x1_t, n: const int",int64x1_t,Shift right and insert
+TRUE,vsri_n_s8,"a: int8x8_t, b: int8x8_t, n: const int",int8x8_t,Shift right and insert
+TRUE,vsri_n_u16,"a: uint16x4_t, b: uint16x4_t, n: const int",uint16x4_t,Shift right and insert
+TRUE,vsri_n_u32,"a: uint32x2_t, b: uint32x2_t, n: const int",uint32x2_t,Shift right and insert
+TRUE,vsri_n_u64,"a: uint64x1_t, b: uint64x1_t, n: const int",uint64x1_t,Shift right and insert
+TRUE,vsri_n_u8,"a: uint8x8_t, b: uint8x8_t, n: const int",uint8x8_t,Shift right and insert
+TRUE,vsrid_n_s64,"a: i64, b: i64, n: const int",i64,Shift right and insert
+TRUE,vsrid_n_u64,"a: u64, b: u64, n: const int",u64,Shift right and insert
+TRUE,vsriq_n_p16,"a: poly16x8_t, b: poly16x8_t, n: const int",poly16x8_t,Shift right and insert
+FALSE,vsriq_n_p64,"a: poly64x2_t, b: poly64x2_t, n: const int",poly64x2_t,Shift right and insert
+TRUE,vsriq_n_p8,"a: poly8x16_t, b: poly8x16_t, n: const int",poly8x16_t,Shift right and insert
+TRUE,vsriq_n_s16,"a: int16x8_t, b: int16x8_t, n: const int",int16x8_t,Shift right and insert
+TRUE,vsriq_n_s32,"a: int32x4_t, b: int32x4_t, n: const int",int32x4_t,Shift right and insert
+TRUE,vsriq_n_s64,"a: int64x2_t, b: int64x2_t, n: const int",int64x2_t,Shift right and insert
+TRUE,vsriq_n_s8,"a: int8x16_t, b: int8x16_t, n: const int",int8x16_t,Shift right and insert
+TRUE,vsriq_n_u16,"a: uint16x8_t, b: uint16x8_t, n: const int",uint16x8_t,Shift right and insert
+TRUE,vsriq_n_u32,"a: uint32x4_t, b: uint32x4_t, n: const int",uint32x4_t,Shift right and insert
+TRUE,vsriq_n_u64,"a: uint64x2_t, b: uint64x2_t, n: const int",uint64x2_t,Shift right and insert
+TRUE,vsriq_n_u8,"a: uint8x16_t, b: uint8x16_t, n: const int",uint8x16_t,Shift right and insert
+FALSE,vst1_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_bf16_x2,"ptr: *mut bfloat16_t, val: bfloat16x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_bf16_x3,"ptr: *mut bfloat16_t, val: bfloat16x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_bf16_x4,"ptr: *mut bfloat16_t, val: bfloat16x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f16,"ptr: *mut float16_t, val: float16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f16_x2,"ptr: *mut float16_t, val: float16x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f16_x3,"ptr: *mut float16_t, val: float16x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f16_x4,"ptr: *mut float16_t, val: float16x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f32,"ptr: *mut f32, val: float32x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f32_x2,"ptr: *mut f32, val: float32x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f32_x3,"ptr: *mut f32, val: float32x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f32_x4,"ptr: *mut f32, val: float32x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f64,"ptr: *mut float64_t, val: float64x1_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f64_x2,"ptr: *mut float64_t, val: float64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f64_x3,"ptr: *mut float64_t, val: float64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_f64_x4,"ptr: *mut float64_t, val: float64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_f16,"ptr: *mut float16_t, val: float16x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_f32,"ptr: *mut f32, val: float32x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_f64,"ptr: *mut float64_t, val: float64x1_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_p16,"ptr: *mut poly16_t, val: poly16x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_p64,"ptr: *mut poly64_t, val: poly64x1_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_p8,"ptr: *mut poly8_t, val: poly8x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_s16,"ptr: *mut i16, val: int16x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_s32,"ptr: *mut i32, val: int32x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_s64,"ptr: *mut i64, val: int64x1_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_s8,"ptr: *mut i8, val: int8x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_u16,"ptr: *mut u16, val: uint16x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_u32,"ptr: *mut u32, val: uint32x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_u64,"ptr: *mut u64, val: uint64x1_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_lane_u8,"ptr: *mut u8, val: uint8x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p16,"ptr: *mut poly16_t, val: poly16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p16_x2,"ptr: *mut poly16_t, val: poly16x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p16_x3,"ptr: *mut poly16_t, val: poly16x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p16_x4,"ptr: *mut poly16_t, val: poly16x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p64,"ptr: *mut poly64_t, val: poly64x1_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p64_x2,"ptr: *mut poly64_t, val: poly64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p64_x3,"ptr: *mut poly64_t, val: poly64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p64_x4,"ptr: *mut poly64_t, val: poly64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p8,"ptr: *mut poly8_t, val: poly8x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p8_x2,"ptr: *mut poly8_t, val: poly8x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p8_x3,"ptr: *mut poly8_t, val: poly8x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_p8_x4,"ptr: *mut poly8_t, val: poly8x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s16,"ptr: *mut i16, val: int16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s16_x2,"ptr: *mut i16, val: int16x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s16_x3,"ptr: *mut i16, val: int16x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s16_x4,"ptr: *mut i16, val: int16x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s32,"ptr: *mut i32, val: int32x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s32_x2,"ptr: *mut i32, val: int32x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s32_x3,"ptr: *mut i32, val: int32x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s32_x4,"ptr: *mut i32, val: int32x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s64,"ptr: *mut i64, val: int64x1_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s64_x2,"ptr: *mut i64, val: int64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s64_x3,"ptr: *mut i64, val: int64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s64_x4,"ptr: *mut i64, val: int64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s8,"ptr: *mut i8, val: int8x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s8_x2,"ptr: *mut i8, val: int8x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s8_x3,"ptr: *mut i8, val: int8x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_s8_x4,"ptr: *mut i8, val: int8x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u16,"ptr: *mut u16, val: uint16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u16_x2,"ptr: *mut u16, val: uint16x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u16_x3,"ptr: *mut u16, val: uint16x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u16_x4,"ptr: *mut u16, val: uint16x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u32,"ptr: *mut u32, val: uint32x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u32_x2,"ptr: *mut u32, val: uint32x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u32_x3,"ptr: *mut u32, val: uint32x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u32_x4,"ptr: *mut u32, val: uint32x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u64,"ptr: *mut u64, val: uint64x1_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u64_x2,"ptr: *mut u64, val: uint64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u64_x3,"ptr: *mut u64, val: uint64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u64_x4,"ptr: *mut u64, val: uint64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u8,"ptr: *mut u8, val: uint8x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u8_x2,"ptr: *mut u8, val: uint8x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u8_x3,"ptr: *mut u8, val: uint8x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1_u8_x4,"ptr: *mut u8, val: uint8x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_bf16_x2,"ptr: *mut bfloat16_t, val: bfloat16x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_bf16_x3,"ptr: *mut bfloat16_t, val: bfloat16x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_bf16_x4,"ptr: *mut bfloat16_t, val: bfloat16x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f16,"ptr: *mut float16_t, val: float16x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f16_x2,"ptr: *mut float16_t, val: float16x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f16_x3,"ptr: *mut float16_t, val: float16x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f16_x4,"ptr: *mut float16_t, val: float16x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f32,"ptr: *mut f32, val: float32x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f32_x2,"ptr: *mut f32, val: float32x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f32_x3,"ptr: *mut f32, val: float32x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f32_x4,"ptr: *mut f32, val: float32x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f64,"ptr: *mut float64_t, val: float64x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f64_x2,"ptr: *mut float64_t, val: float64x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f64_x3,"ptr: *mut float64_t, val: float64x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_f64_x4,"ptr: *mut float64_t, val: float64x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_f16,"ptr: *mut float16_t, val: float16x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_f32,"ptr: *mut f32, val: float32x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_f64,"ptr: *mut float64_t, val: float64x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_p16,"ptr: *mut poly16_t, val: poly16x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_p64,"ptr: *mut poly64_t, val: poly64x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_p8,"ptr: *mut poly8_t, val: poly8x16_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_s16,"ptr: *mut i16, val: int16x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_s32,"ptr: *mut i32, val: int32x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_s64,"ptr: *mut i64, val: int64x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_s8,"ptr: *mut i8, val: int8x16_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_u16,"ptr: *mut u16, val: uint16x8_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_u32,"ptr: *mut u32, val: uint32x4_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_u64,"ptr: *mut u64, val: uint64x2_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_lane_u8,"ptr: *mut u8, val: uint8x16_t, lane: const int",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p16,"ptr: *mut poly16_t, val: poly16x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p16_x2,"ptr: *mut poly16_t, val: poly16x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p16_x3,"ptr: *mut poly16_t, val: poly16x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p16_x4,"ptr: *mut poly16_t, val: poly16x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p64,"ptr: *mut poly64_t, val: poly64x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p64_x2,"ptr: *mut poly64_t, val: poly64x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p64_x3,"ptr: *mut poly64_t, val: poly64x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p64_x4,"ptr: *mut poly64_t, val: poly64x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p8,"ptr: *mut poly8_t, val: poly8x16_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p8_x2,"ptr: *mut poly8_t, val: poly8x16x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p8_x3,"ptr: *mut poly8_t, val: poly8x16x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_p8_x4,"ptr: *mut poly8_t, val: poly8x16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s16,"ptr: *mut i16, val: int16x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s16_x2,"ptr: *mut i16, val: int16x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s16_x3,"ptr: *mut i16, val: int16x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s16_x4,"ptr: *mut i16, val: int16x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s32,"ptr: *mut i32, val: int32x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s32_x2,"ptr: *mut i32, val: int32x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s32_x3,"ptr: *mut i32, val: int32x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s32_x4,"ptr: *mut i32, val: int32x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s64,"ptr: *mut i64, val: int64x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s64_x2,"ptr: *mut i64, val: int64x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s64_x3,"ptr: *mut i64, val: int64x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s64_x4,"ptr: *mut i64, val: int64x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s8,"ptr: *mut i8, val: int8x16_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s8_x2,"ptr: *mut i8, val: int8x16x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s8_x3,"ptr: *mut i8, val: int8x16x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_s8_x4,"ptr: *mut i8, val: int8x16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u16,"ptr: *mut u16, val: uint16x8_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u16_x2,"ptr: *mut u16, val: uint16x8x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u16_x3,"ptr: *mut u16, val: uint16x8x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u16_x4,"ptr: *mut u16, val: uint16x8x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u32,"ptr: *mut u32, val: uint32x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u32_x2,"ptr: *mut u32, val: uint32x4x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u32_x3,"ptr: *mut u32, val: uint32x4x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u32_x4,"ptr: *mut u32, val: uint32x4x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u64,"ptr: *mut u64, val: uint64x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u64_x2,"ptr: *mut u64, val: uint64x2x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u64_x3,"ptr: *mut u64, val: uint64x2x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u64_x4,"ptr: *mut u64, val: uint64x2x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u8,"ptr: *mut u8, val: uint8x16_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u8_x2,"ptr: *mut u8, val: uint8x16x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u8_x3,"ptr: *mut u8, val: uint8x16x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst1q_u8_x4,"ptr: *mut u8, val: uint8x16x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst2_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_f16,"ptr: *mut float16_t, val: float16x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_f32,"ptr: *mut f32, val: float32x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_f64,"ptr: *mut float64_t, val: float64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst2_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_f16,"ptr: *mut float16_t, val: float16x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_f32,"ptr: *mut f32, val: float32x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_f64,"ptr: *mut float64_t, val: float64x1x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_p16,"ptr: *mut poly16_t, val: poly16x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_p64,"ptr: *mut poly64_t, val: poly64x1x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_p8,"ptr: *mut poly8_t, val: poly8x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_s16,"ptr: *mut i16, val: int16x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_s32,"ptr: *mut i32, val: int32x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_s64,"ptr: *mut i64, val: int64x1x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_s8,"ptr: *mut i8, val: int8x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_u16,"ptr: *mut u16, val: uint16x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_u32,"ptr: *mut u32, val: uint32x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_u64,"ptr: *mut u64, val: uint64x1x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_lane_u8,"ptr: *mut u8, val: uint8x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2_p16,"ptr: *mut poly16_t, val: poly16x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_p64,"ptr: *mut poly64_t, val: poly64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst2_p8,"ptr: *mut poly8_t, val: poly8x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_s16,"ptr: *mut i16, val: int16x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_s32,"ptr: *mut i32, val: int32x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_s64,"ptr: *mut i64, val: int64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst2_s8,"ptr: *mut i8, val: int8x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_u16,"ptr: *mut u16, val: uint16x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_u32,"ptr: *mut u32, val: uint32x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2_u64,"ptr: *mut u64, val: uint64x1x2_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst2_u8,"ptr: *mut u8, val: uint8x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_f16,"ptr: *mut float16_t, val: float16x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_f32,"ptr: *mut f32, val: float32x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_f64,"ptr: *mut float64_t, val: float64x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_f16,"ptr: *mut float16_t, val: float16x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_f32,"ptr: *mut f32, val: float32x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_f64,"ptr: *mut float64_t, val: float64x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_p16,"ptr: *mut poly16_t, val: poly16x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_p64,"ptr: *mut poly64_t, val: poly64x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_p8,"ptr: *mut poly8_t, val: poly8x16x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_s16,"ptr: *mut i16, val: int16x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_s32,"ptr: *mut i32, val: int32x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_s64,"ptr: *mut i64, val: int64x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_s8,"ptr: *mut i8, val: int8x16x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_u16,"ptr: *mut u16, val: uint16x8x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_u32,"ptr: *mut u32, val: uint32x4x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_u64,"ptr: *mut u64, val: uint64x2x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_lane_u8,"ptr: *mut u8, val: uint8x16x2_t, lane: const int",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_p16,"ptr: *mut poly16_t, val: poly16x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_p64,"ptr: *mut poly64_t, val: poly64x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_p8,"ptr: *mut poly8_t, val: poly8x16x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_s16,"ptr: *mut i16, val: int16x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_s32,"ptr: *mut i32, val: int32x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_s64,"ptr: *mut i64, val: int64x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_s8,"ptr: *mut i8, val: int8x16x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_u16,"ptr: *mut u16, val: uint16x8x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_u32,"ptr: *mut u32, val: uint32x4x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_u64,"ptr: *mut u64, val: uint64x2x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst2q_u8,"ptr: *mut u8, val: uint8x16x2_t",void,Store multiple 2-element structures from two registers
+FALSE,vst3_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_f16,"ptr: *mut float16_t, val: float16x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_f32,"ptr: *mut f32, val: float32x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_f64,"ptr: *mut float64_t, val: float64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst3_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_f16,"ptr: *mut float16_t, val: float16x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_f32,"ptr: *mut f32, val: float32x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_f64,"ptr: *mut float64_t, val: float64x1x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_p16,"ptr: *mut poly16_t, val: poly16x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_p64,"ptr: *mut poly64_t, val: poly64x1x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_p8,"ptr: *mut poly8_t, val: poly8x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_s16,"ptr: *mut i16, val: int16x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_s32,"ptr: *mut i32, val: int32x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_s64,"ptr: *mut i64, val: int64x1x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_s8,"ptr: *mut i8, val: int8x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_u16,"ptr: *mut u16, val: uint16x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_u32,"ptr: *mut u32, val: uint32x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_u64,"ptr: *mut u64, val: uint64x1x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_lane_u8,"ptr: *mut u8, val: uint8x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3_p16,"ptr: *mut poly16_t, val: poly16x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_p64,"ptr: *mut poly64_t, val: poly64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst3_p8,"ptr: *mut poly8_t, val: poly8x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_s16,"ptr: *mut i16, val: int16x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_s32,"ptr: *mut i32, val: int32x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_s64,"ptr: *mut i64, val: int64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst3_s8,"ptr: *mut i8, val: int8x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_u16,"ptr: *mut u16, val: uint16x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_u32,"ptr: *mut u32, val: uint32x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3_u64,"ptr: *mut u64, val: uint64x1x3_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst3_u8,"ptr: *mut u8, val: uint8x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_f16,"ptr: *mut float16_t, val: float16x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_f32,"ptr: *mut f32, val: float32x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_f64,"ptr: *mut float64_t, val: float64x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_f16,"ptr: *mut float16_t, val: float16x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_f32,"ptr: *mut f32, val: float32x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_f64,"ptr: *mut float64_t, val: float64x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_p16,"ptr: *mut poly16_t, val: poly16x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_p64,"ptr: *mut poly64_t, val: poly64x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_p8,"ptr: *mut poly8_t, val: poly8x16x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_s16,"ptr: *mut i16, val: int16x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_s32,"ptr: *mut i32, val: int32x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_s64,"ptr: *mut i64, val: int64x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_s8,"ptr: *mut i8, val: int8x16x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_u16,"ptr: *mut u16, val: uint16x8x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_u32,"ptr: *mut u32, val: uint32x4x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_u64,"ptr: *mut u64, val: uint64x2x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_lane_u8,"ptr: *mut u8, val: uint8x16x3_t, lane: const int",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_p16,"ptr: *mut poly16_t, val: poly16x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_p64,"ptr: *mut poly64_t, val: poly64x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_p8,"ptr: *mut poly8_t, val: poly8x16x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_s16,"ptr: *mut i16, val: int16x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_s32,"ptr: *mut i32, val: int32x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_s64,"ptr: *mut i64, val: int64x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_s8,"ptr: *mut i8, val: int8x16x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_u16,"ptr: *mut u16, val: uint16x8x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_u32,"ptr: *mut u32, val: uint32x4x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_u64,"ptr: *mut u64, val: uint64x2x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst3q_u8,"ptr: *mut u8, val: uint8x16x3_t",void,Store multiple 3-element structures from three registers
+FALSE,vst4_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_f16,"ptr: *mut float16_t, val: float16x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_f32,"ptr: *mut f32, val: float32x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_f64,"ptr: *mut float64_t, val: float64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst4_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_f16,"ptr: *mut float16_t, val: float16x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_f32,"ptr: *mut f32, val: float32x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_f64,"ptr: *mut float64_t, val: float64x1x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_p16,"ptr: *mut poly16_t, val: poly16x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_p64,"ptr: *mut poly64_t, val: poly64x1x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_p8,"ptr: *mut poly8_t, val: poly8x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_s16,"ptr: *mut i16, val: int16x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_s32,"ptr: *mut i32, val: int32x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_s64,"ptr: *mut i64, val: int64x1x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_s8,"ptr: *mut i8, val: int8x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_u16,"ptr: *mut u16, val: uint16x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_u32,"ptr: *mut u32, val: uint32x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_u64,"ptr: *mut u64, val: uint64x1x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_lane_u8,"ptr: *mut u8, val: uint8x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4_p16,"ptr: *mut poly16_t, val: poly16x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_p64,"ptr: *mut poly64_t, val: poly64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst4_p8,"ptr: *mut poly8_t, val: poly8x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_s16,"ptr: *mut i16, val: int16x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_s32,"ptr: *mut i32, val: int32x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_s64,"ptr: *mut i64, val: int64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst4_s8,"ptr: *mut i8, val: int8x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_u16,"ptr: *mut u16, val: uint16x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_u32,"ptr: *mut u32, val: uint32x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4_u64,"ptr: *mut u64, val: uint64x1x4_t",void,"Store multiple single-element structures from one, two, three, or four registers"
+FALSE,vst4_u8,"ptr: *mut u8, val: uint8x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_f16,"ptr: *mut float16_t, val: float16x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_f32,"ptr: *mut f32, val: float32x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_f64,"ptr: *mut float64_t, val: float64x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_bf16,"ptr: *mut bfloat16_t, val: bfloat16x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_f16,"ptr: *mut float16_t, val: float16x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_f32,"ptr: *mut f32, val: float32x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_f64,"ptr: *mut float64_t, val: float64x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_p16,"ptr: *mut poly16_t, val: poly16x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_p64,"ptr: *mut poly64_t, val: poly64x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_p8,"ptr: *mut poly8_t, val: poly8x16x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_s16,"ptr: *mut i16, val: int16x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_s32,"ptr: *mut i32, val: int32x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_s64,"ptr: *mut i64, val: int64x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_s8,"ptr: *mut i8, val: int8x16x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_u16,"ptr: *mut u16, val: uint16x8x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_u32,"ptr: *mut u32, val: uint32x4x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_u64,"ptr: *mut u64, val: uint64x2x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_lane_u8,"ptr: *mut u8, val: uint8x16x4_t, lane: const int",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_p16,"ptr: *mut poly16_t, val: poly16x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_p64,"ptr: *mut poly64_t, val: poly64x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_p8,"ptr: *mut poly8_t, val: poly8x16x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_s16,"ptr: *mut i16, val: int16x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_s32,"ptr: *mut i32, val: int32x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_s64,"ptr: *mut i64, val: int64x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_s8,"ptr: *mut i8, val: int8x16x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_u16,"ptr: *mut u16, val: uint16x8x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_u32,"ptr: *mut u32, val: uint32x4x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_u64,"ptr: *mut u64, val: uint64x2x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vst4q_u8,"ptr: *mut u8, val: uint8x16x4_t",void,Store multiple 4-element structures from four registers
+FALSE,vstrq_p128,"ptr: *mut poly128_t, val: poly128_t",void,Store SIMD&FP register (immediate offset)
+FALSE,vsub_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Floating-point subtract
+TRUE,vsub_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Floating-point subtract
+TRUE,vsub_f64,"a: float64x1_t, b: float64x1_t",float64x1_t,Floating-point subtract
+TRUE,vsub_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Subtract
+TRUE,vsub_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Subtract
+TRUE,vsub_s64,"a: int64x1_t, b: int64x1_t",int64x1_t,Subtract
+TRUE,vsub_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Subtract
+TRUE,vsub_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Subtract
+TRUE,vsub_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Subtract
+TRUE,vsub_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Subtract
+TRUE,vsub_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Subtract
+FALSE,vsubd_s64,"a: i64, b: i64",i64,Subtract
+FALSE,vsubd_u64,"a: u64, b: u64",u64,Subtract
+FALSE,vsubh_f16,"a: float16_t, b: float16_t",float16_t,Floating-point subtract
+TRUE,vsubhn_high_s16,"r: int8x8_t, a: int16x8_t, b: int16x8_t",int8x16_t,Subtract returning high narrow
+TRUE,vsubhn_high_s32,"r: int16x4_t, a: int32x4_t, b: int32x4_t",int16x8_t,Subtract returning high narrow
+TRUE,vsubhn_high_s64,"r: int32x2_t, a: int64x2_t, b: int64x2_t",int32x4_t,Subtract returning high narrow
+TRUE,vsubhn_high_u16,"r: uint8x8_t, a: uint16x8_t, b: uint16x8_t",uint8x16_t,Subtract returning high narrow
+TRUE,vsubhn_high_u32,"r: uint16x4_t, a: uint32x4_t, b: uint32x4_t",uint16x8_t,Subtract returning high narrow
+TRUE,vsubhn_high_u64,"r: uint32x2_t, a: uint64x2_t, b: uint64x2_t",uint32x4_t,Subtract returning high narrow
+TRUE,vsubhn_s16,"a: int16x8_t, b: int16x8_t",int8x8_t,Subtract returning high narrow
+TRUE,vsubhn_s32,"a: int32x4_t, b: int32x4_t",int16x4_t,Subtract returning high narrow
+TRUE,vsubhn_s64,"a: int64x2_t, b: int64x2_t",int32x2_t,Subtract returning high narrow
+TRUE,vsubhn_u16,"a: uint16x8_t, b: uint16x8_t",uint8x8_t,Subtract returning high narrow
+TRUE,vsubhn_u32,"a: uint32x4_t, b: uint32x4_t",uint16x4_t,Subtract returning high narrow
+TRUE,vsubhn_u64,"a: uint64x2_t, b: uint64x2_t",uint32x2_t,Subtract returning high narrow
+TRUE,vsubl_high_s16,"a: int16x8_t, b: int16x8_t",int32x4_t,Signed subtract long
+TRUE,vsubl_high_s32,"a: int32x4_t, b: int32x4_t",int64x2_t,Signed subtract long
+TRUE,vsubl_high_s8,"a: int8x16_t, b: int8x16_t",int16x8_t,Signed subtract long
+TRUE,vsubl_high_u16,"a: uint16x8_t, b: uint16x8_t",uint32x4_t,Unsigned subtract long
+TRUE,vsubl_high_u32,"a: uint32x4_t, b: uint32x4_t",uint64x2_t,Unsigned subtract long
+TRUE,vsubl_high_u8,"a: uint8x16_t, b: uint8x16_t",uint16x8_t,Unsigned subtract long
+TRUE,vsubl_s16,"a: int16x4_t, b: int16x4_t",int32x4_t,Signed subtract long
+TRUE,vsubl_s32,"a: int32x2_t, b: int32x2_t",int64x2_t,Signed subtract long
+TRUE,vsubl_s8,"a: int8x8_t, b: int8x8_t",int16x8_t,Signed subtract long
+TRUE,vsubl_u16,"a: uint16x4_t, b: uint16x4_t",uint32x4_t,Unsigned subtract long
+TRUE,vsubl_u32,"a: uint32x2_t, b: uint32x2_t",uint64x2_t,Unsigned subtract long
+TRUE,vsubl_u8,"a: uint8x8_t, b: uint8x8_t",uint16x8_t,Unsigned subtract long
+FALSE,vsubq_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Floating-point subtract
+TRUE,vsubq_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Floating-point subtract
+TRUE,vsubq_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Floating-point subtract
+TRUE,vsubq_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Subtract
+TRUE,vsubq_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Subtract
+TRUE,vsubq_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Subtract
+TRUE,vsubq_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Subtract
+TRUE,vsubq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Subtract
+TRUE,vsubq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Subtract
+TRUE,vsubq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Subtract
+TRUE,vsubq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Subtract
+TRUE,vsubw_high_s16,"a: int32x4_t, b: int16x8_t",int32x4_t,Signed subtract wide
+TRUE,vsubw_high_s32,"a: int64x2_t, b: int32x4_t",int64x2_t,Signed subtract wide
+TRUE,vsubw_high_s8,"a: int16x8_t, b: int8x16_t",int16x8_t,Signed subtract wide
+TRUE,vsubw_high_u16,"a: uint32x4_t, b: uint16x8_t",uint32x4_t,Unsigned subtract wide
+TRUE,vsubw_high_u32,"a: uint64x2_t, b: uint32x4_t",uint64x2_t,Unsigned subtract wide
+TRUE,vsubw_high_u8,"a: uint16x8_t, b: uint8x16_t",uint16x8_t,Unsigned subtract wide
+TRUE,vsubw_s16,"a: int32x4_t, b: int16x4_t",int32x4_t,Signed subtract wide
+TRUE,vsubw_s32,"a: int64x2_t, b: int32x2_t",int64x2_t,Signed subtract wide
+TRUE,vsubw_s8,"a: int16x8_t, b: int8x8_t",int16x8_t,Signed subtract wide
+TRUE,vsubw_u16,"a: uint32x4_t, b: uint16x4_t",uint32x4_t,Unsigned subtract wide
+TRUE,vsubw_u32,"a: uint64x2_t, b: uint32x2_t",uint64x2_t,Unsigned subtract wide
+TRUE,vsubw_u8,"a: uint16x8_t, b: uint8x8_t",uint16x8_t,Unsigned subtract wide
+FALSE,vsudot_lane_s32,"r: int32x2_t, a: int8x8_t, b: uint8x8_t, lane: const int",int32x2_t,Dot product index form with signed and unsigned integers
+FALSE,vsudot_laneq_s32,"r: int32x2_t, a: int8x8_t, b: uint8x16_t, lane: const int",int32x2_t,Dot product index form with signed and unsigned integers
+FALSE,vsudotq_lane_s32,"r: int32x4_t, a: int8x16_t, b: uint8x8_t, lane: const int",int32x4_t,Dot product index form with signed and unsigned integers
+FALSE,vsudotq_laneq_s32,"r: int32x4_t, a: int8x16_t, b: uint8x16_t, lane: const int",int32x4_t,Dot product index form with signed and unsigned integers
+TRUE,vtbl1_p8,"a: poly8x8_t, b: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vtbl1_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Table vector lookup
+TRUE,vtbl1_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vtbl2_p8,"a: poly8x8x2_t, b: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vtbl2_s8,"a: int8x8x2_t, b: int8x8_t",int8x8_t,Table vector lookup
+TRUE,vtbl2_u8,"a: uint8x8x2_t, b: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vtbl3_p8,"a: poly8x8x3_t, b: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vtbl3_s8,"a: int8x8x3_t, b: int8x8_t",int8x8_t,Table vector lookup
+TRUE,vtbl3_u8,"a: uint8x8x3_t, b: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vtbl4_p8,"a: poly8x8x4_t, b: uint8x8_t",poly8x8_t,Table vector lookup
+TRUE,vtbl4_s8,"a: int8x8x4_t, b: int8x8_t",int8x8_t,Table vector lookup
+TRUE,vtbl4_u8,"a: uint8x8x4_t, b: uint8x8_t",uint8x8_t,Table vector lookup
+TRUE,vtbx1_p8,"a: poly8x8_t, b: poly8x8_t, c: uint8x8_t",poly8x8_t,Bitwise insert if false
+TRUE,vtbx1_s8,"a: int8x8_t, b: int8x8_t, c: int8x8_t",int8x8_t,Bitwise insert if false
+TRUE,vtbx1_u8,"a: uint8x8_t, b: uint8x8_t, c: uint8x8_t",uint8x8_t,Bitwise insert if false
+TRUE,vtbx2_p8,"a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vtbx2_s8,"a: int8x8_t, b: int8x8x2_t, c: int8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vtbx2_u8,"a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t",uint8x8_t,Table vector lookup extension
+TRUE,vtbx3_p8,"a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t",poly8x8_t,Bitwise insert if false
+TRUE,vtbx3_s8,"a: int8x8_t, b: int8x8x3_t, c: int8x8_t",int8x8_t,Bitwise insert if false
+TRUE,vtbx3_u8,"a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t",uint8x8_t,Bitwise insert if false
+TRUE,vtbx4_p8,"a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t",poly8x8_t,Table vector lookup extension
+TRUE,vtbx4_s8,"a: int8x8_t, b: int8x8x4_t, c: int8x8_t",int8x8_t,Table vector lookup extension
+TRUE,vtbx4_u8,"a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t",uint8x8_t,Table vector lookup extension
+FALSE,vtrn_f16,"a: float16x4_t, b: float16x4_t",float16x4x2_t,Transpose elements
+FALSE,vtrn_f32,"a: float32x2_t, b: float32x2_t",float32x2x2_t,Transpose elements
+FALSE,vtrn_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4x2_t,Transpose elements
+FALSE,vtrn_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8x2_t,Transpose elements
+FALSE,vtrn_s16,"a: int16x4_t, b: int16x4_t",int16x4x2_t,Transpose elements
+FALSE,vtrn_s32,"a: int32x2_t, b: int32x2_t",int32x2x2_t,Transpose elements
+FALSE,vtrn_s8,"a: int8x8_t, b: int8x8_t",int8x8x2_t,Transpose elements
+FALSE,vtrn_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4x2_t,Transpose elements
+FALSE,vtrn_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2x2_t,Transpose elements
+FALSE,vtrn_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8x2_t,Transpose elements
+FALSE,vtrn1_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Transpose vectors
+TRUE,vtrn1_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Transpose vectors
+TRUE,vtrn1_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Transpose vectors
+TRUE,vtrn1_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Transpose vectors
+TRUE,vtrn1_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Transpose vectors
+TRUE,vtrn1_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Transpose vectors
+TRUE,vtrn1_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Transpose vectors
+TRUE,vtrn1_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Transpose vectors
+TRUE,vtrn1_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Transpose vectors
+TRUE,vtrn1_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Transpose vectors
+FALSE,vtrn1q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Transpose vectors
+TRUE,vtrn1q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Transpose vectors
+TRUE,vtrn1q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Transpose vectors
+TRUE,vtrn1q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Transpose vectors
+TRUE,vtrn1q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Transpose vectors
+TRUE,vtrn1q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Transpose vectors
+TRUE,vtrn1q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Transpose vectors
+TRUE,vtrn1q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Transpose vectors
+TRUE,vtrn1q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Transpose vectors
+TRUE,vtrn1q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Transpose vectors
+TRUE,vtrn1q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Transpose vectors
+TRUE,vtrn1q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Transpose vectors
+TRUE,vtrn1q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Transpose vectors
+TRUE,vtrn1q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Transpose vectors
+FALSE,vtrn2_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Transpose vectors
+TRUE,vtrn2_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Transpose vectors
+TRUE,vtrn2_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Transpose vectors
+TRUE,vtrn2_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Transpose vectors
+TRUE,vtrn2_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Transpose vectors
+TRUE,vtrn2_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Transpose vectors
+TRUE,vtrn2_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Transpose vectors
+TRUE,vtrn2_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Transpose vectors
+TRUE,vtrn2_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Transpose vectors
+TRUE,vtrn2_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Transpose vectors
+FALSE,vtrn2q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Transpose vectors
+TRUE,vtrn2q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Transpose vectors
+TRUE,vtrn2q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Transpose vectors
+TRUE,vtrn2q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Transpose vectors
+TRUE,vtrn2q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Transpose vectors
+TRUE,vtrn2q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Transpose vectors
+TRUE,vtrn2q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Transpose vectors
+TRUE,vtrn2q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Transpose vectors
+TRUE,vtrn2q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Transpose vectors
+TRUE,vtrn2q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Transpose vectors
+TRUE,vtrn2q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Transpose vectors
+TRUE,vtrn2q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Transpose vectors
+TRUE,vtrn2q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Transpose vectors
+TRUE,vtrn2q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Transpose vectors
+FALSE,vtrnq_f16,"a: float16x8_t, b: float16x8_t",float16x8x2_t,Transpose elements
+FALSE,vtrnq_f32,"a: float32x4_t, b: float32x4_t",float32x4x2_t,Transpose elements
+FALSE,vtrnq_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8x2_t,Transpose elements
+FALSE,vtrnq_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16x2_t,Transpose elements
+FALSE,vtrnq_s16,"a: int16x8_t, b: int16x8_t",int16x8x2_t,Transpose elements
+FALSE,vtrnq_s32,"a: int32x4_t, b: int32x4_t",int32x4x2_t,Transpose elements
+FALSE,vtrnq_s8,"a: int8x16_t, b: int8x16_t",int8x16x2_t,Transpose elements
+FALSE,vtrnq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8x2_t,Transpose elements
+FALSE,vtrnq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4x2_t,Transpose elements
+FALSE,vtrnq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16x2_t,Transpose elements
+TRUE,vtst_p64,"a: poly64x1_t, b: poly64x1_t",uint64x1_t,Compare bitwise test bits nonzero
+TRUE,vtst_p8,"a: poly8x8_t, b: poly8x8_t",uint8x8_t,Compare bitwise test bits nonzero
+TRUE,vtst_s16,"a: int16x4_t, b: int16x4_t",uint16x4_t,Compare bitwise test bits nonzero
+TRUE,vtst_s32,"a: int32x2_t, b: int32x2_t",uint32x2_t,Compare bitwise test bits nonzero
+TRUE,vtst_s64,"a: int64x1_t, b: int64x1_t",uint64x1_t,Compare bitwise test bits nonzero
+TRUE,vtst_s8,"a: int8x8_t, b: int8x8_t",uint8x8_t,Compare bitwise test bits nonzero
+TRUE,vtst_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Compare bitwise test bits nonzero
+TRUE,vtst_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Compare bitwise test bits nonzero
+TRUE,vtst_u64,"a: uint64x1_t, b: uint64x1_t",uint64x1_t,Compare bitwise test bits nonzero
+TRUE,vtst_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Compare bitwise test bits nonzero
+FALSE,vtstd_s64,"a: i64, b: i64",u64,Compare bitwise test bits nonzero
+FALSE,vtstd_u64,"a: u64, b: u64",u64,Compare bitwise test bits nonzero
+TRUE,vtstq_p64,"a: poly64x2_t, b: poly64x2_t",uint64x2_t,Compare bitwise test bits nonzero
+TRUE,vtstq_p8,"a: poly8x16_t, b: poly8x16_t",uint8x16_t,Compare bitwise test bits nonzero
+TRUE,vtstq_s16,"a: int16x8_t, b: int16x8_t",uint16x8_t,Compare bitwise test bits nonzero
+TRUE,vtstq_s32,"a: int32x4_t, b: int32x4_t",uint32x4_t,Compare bitwise test bits nonzero
+TRUE,vtstq_s64,"a: int64x2_t, b: int64x2_t",uint64x2_t,Compare bitwise test bits nonzero
+TRUE,vtstq_s8,"a: int8x16_t, b: int8x16_t",uint8x16_t,Compare bitwise test bits nonzero
+TRUE,vtstq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Compare bitwise test bits nonzero
+TRUE,vtstq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Compare bitwise test bits nonzero
+TRUE,vtstq_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Compare bitwise test bits nonzero
+TRUE,vtstq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Compare bitwise test bits nonzero
+FALSE,vuqadd_s16,"a: int16x4_t, b: uint16x4_t",int16x4_t,Signed saturating accumulate of unsigned value
+FALSE,vuqadd_s32,"a: int32x2_t, b: uint32x2_t",int32x2_t,Signed saturating accumulate of unsigned value
+FALSE,vuqadd_s64,"a: int64x1_t, b: uint64x1_t",int64x1_t,Signed saturating accumulate of unsigned value
+FALSE,vuqadd_s8,"a: int8x8_t, b: uint8x8_t",int8x8_t,Signed saturating accumulate of unsigned value
+FALSE,vuqaddb_s8,"a: i8, b: u8",i8,Signed saturating accumulate of unsigned value
+FALSE,vuqaddd_s64,"a: i64, b: u64",i64,Signed saturating accumulate of unsigned value
+FALSE,vuqaddh_s16,"a: i16, b: u16",i16,Signed saturating accumulate of unsigned value
+FALSE,vuqaddq_s16,"a: int16x8_t, b: uint16x8_t",int16x8_t,Signed saturating accumulate of unsigned value
+FALSE,vuqaddq_s32,"a: int32x4_t, b: uint32x4_t",int32x4_t,Signed saturating accumulate of unsigned value
+FALSE,vuqaddq_s64,"a: int64x2_t, b: uint64x2_t",int64x2_t,Signed saturating accumulate of unsigned value
+FALSE,vuqaddq_s8,"a: int8x16_t, b: uint8x16_t",int8x16_t,Signed saturating accumulate of unsigned value
+FALSE,vuqadds_s32,"a: i32, b: u32",i32,Signed saturating accumulate of unsigned value
+FALSE,vusdot_lane_s32,"r: int32x2_t, a: uint8x8_t, b: int8x8_t, lane: const int",int32x2_t,Dot product vector form with unsigned and signed integers
+FALSE,vusdot_laneq_s32,"r: int32x2_t, a: uint8x8_t, b: int8x16_t, lane: const int",int32x2_t,Dot product vector form with unsigned and signed integers
+FALSE,vusdot_s32,"r: int32x2_t, a: uint8x8_t, b: int8x8_t",int32x2_t,Dot product vector form with unsigned and signed integers
+FALSE,vusdotq_lane_s32,"r: int32x4_t, a: uint8x16_t, b: int8x8_t, lane: const int",int32x4_t,Dot product vector form with unsigned and signed integers
+FALSE,vusdotq_laneq_s32,"r: int32x4_t, a: uint8x16_t, b: int8x16_t, lane: const int",int32x4_t,Dot product vector form with unsigned and signed integers
+FALSE,vusdotq_s32,"r: int32x4_t, a: uint8x16_t, b: int8x16_t",int32x4_t,Dot product vector form with unsigned and signed integers
+FALSE,vusmmlaq_s32,"r: int32x4_t, a: uint8x16_t, b: int8x16_t",int32x4_t,Unsigned and signed 8-bit integer matrix multiply-accumulate
+FALSE,vuzp_f16,"a: float16x4_t, b: float16x4_t",float16x4x2_t,Unzip vectors
+FALSE,vuzp_f32,"a: float32x2_t, b: float32x2_t",float32x2x2_t,Unzip vectors
+FALSE,vuzp_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4x2_t,Unzip vectors
+FALSE,vuzp_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8x2_t,Unzip vectors
+FALSE,vuzp_s16,"a: int16x4_t, b: int16x4_t",int16x4x2_t,Unzip vectors
+FALSE,vuzp_s32,"a: int32x2_t, b: int32x2_t",int32x2x2_t,Unzip vectors
+FALSE,vuzp_s8,"a: int8x8_t, b: int8x8_t",int8x8x2_t,Unzip vectors
+FALSE,vuzp_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4x2_t,Unzip vectors
+FALSE,vuzp_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2x2_t,Unzip vectors
+FALSE,vuzp_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8x2_t,Unzip vectors
+FALSE,vuzp1_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Unzip vectors
+TRUE,vuzp1_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Unzip vectors
+FALSE,vuzp1_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Unzip vectors
+TRUE,vuzp1_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Unzip vectors
+TRUE,vuzp1_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Unzip vectors
+TRUE,vuzp1_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Unzip vectors
+TRUE,vuzp1_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Unzip vectors
+TRUE,vuzp1_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unzip vectors
+TRUE,vuzp1_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unzip vectors
+TRUE,vuzp1_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unzip vectors
+FALSE,vuzp1q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Unzip vectors
+TRUE,vuzp1q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Unzip vectors
+TRUE,vuzp1q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Unzip vectors
+FALSE,vuzp1q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Unzip vectors
+TRUE,vuzp1q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Unzip vectors
+FALSE,vuzp1q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Unzip vectors
+TRUE,vuzp1q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Unzip vectors
+TRUE,vuzp1q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Unzip vectors
+TRUE,vuzp1q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Unzip vectors
+TRUE,vuzp1q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Unzip vectors
+TRUE,vuzp1q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unzip vectors
+TRUE,vuzp1q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unzip vectors
+TRUE,vuzp1q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Unzip vectors
+TRUE,vuzp1q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unzip vectors
+FALSE,vuzp2_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Unzip vectors
+TRUE,vuzp2_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Unzip vectors
+FALSE,vuzp2_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Unzip vectors
+TRUE,vuzp2_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Unzip vectors
+TRUE,vuzp2_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Unzip vectors
+TRUE,vuzp2_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Unzip vectors
+TRUE,vuzp2_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Unzip vectors
+TRUE,vuzp2_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Unzip vectors
+TRUE,vuzp2_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Unzip vectors
+TRUE,vuzp2_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Unzip vectors
+FALSE,vuzp2q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Unzip vectors
+TRUE,vuzp2q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Unzip vectors
+TRUE,vuzp2q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Unzip vectors
+FALSE,vuzp2q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Unzip vectors
+TRUE,vuzp2q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Unzip vectors
+FALSE,vuzp2q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Unzip vectors
+TRUE,vuzp2q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Unzip vectors
+TRUE,vuzp2q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Unzip vectors
+TRUE,vuzp2q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Unzip vectors
+TRUE,vuzp2q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Unzip vectors
+TRUE,vuzp2q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Unzip vectors
+TRUE,vuzp2q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Unzip vectors
+TRUE,vuzp2q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Unzip vectors
+TRUE,vuzp2q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Unzip vectors
+FALSE,vuzpq_f16,"a: float16x8_t, b: float16x8_t",float16x8x2_t,Unzip vectors
+FALSE,vuzpq_f32,"a: float32x4_t, b: float32x4_t",float32x4x2_t,Unzip vectors
+FALSE,vuzpq_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8x2_t,Unzip vectors
+FALSE,vuzpq_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16x2_t,Unzip vectors
+FALSE,vuzpq_s16,"a: int16x8_t, b: int16x8_t",int16x8x2_t,Unzip vectors
+FALSE,vuzpq_s32,"a: int32x4_t, b: int32x4_t",int32x4x2_t,Unzip vectors
+FALSE,vuzpq_s8,"a: int8x16_t, b: int8x16_t",int8x16x2_t,Unzip vectors
+FALSE,vuzpq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8x2_t,Unzip vectors
+FALSE,vuzpq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4x2_t,Unzip vectors
+FALSE,vuzpq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16x2_t,Unzip vectors
+FALSE,vxarq_u64,"a: uint64x2_t, b: uint64x2_t, imm6: const int",uint64x2_t,Exclusive OR and rotate
+FALSE,vzip_f16,"a: float16x4_t, b: float16x4_t",float16x4x2_t,Zip vectors
+FALSE,vzip_f32,"a: float32x2_t, b: float32x2_t",float32x2x2_t,Zip vectors
+FALSE,vzip_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4x2_t,Zip vectors
+FALSE,vzip_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8x2_t,Zip vectors
+FALSE,vzip_s16,"a: int16x4_t, b: int16x4_t",int16x4x2_t,Zip vectors
+FALSE,vzip_s32,"a: int32x2_t, b: int32x2_t",int32x2x2_t,Zip vectors
+FALSE,vzip_s8,"a: int8x8_t, b: int8x8_t",int8x8x2_t,Zip vectors
+FALSE,vzip_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4x2_t,Zip vectors
+FALSE,vzip_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2x2_t,Zip vectors
+FALSE,vzip_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8x2_t,Zip vectors
+FALSE,vzip1_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Zip vectors
+TRUE,vzip1_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Zip vectors
+TRUE,vzip1_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Zip vectors
+TRUE,vzip1_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Zip vectors
+TRUE,vzip1_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Zip vectors
+TRUE,vzip1_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Zip vectors
+TRUE,vzip1_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Zip vectors
+TRUE,vzip1_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Zip vectors
+TRUE,vzip1_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Zip vectors
+TRUE,vzip1_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Zip vectors
+FALSE,vzip1q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Zip vectors
+TRUE,vzip1q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Zip vectors
+TRUE,vzip1q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Zip vectors
+TRUE,vzip1q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Zip vectors
+TRUE,vzip1q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Zip vectors
+TRUE,vzip1q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Zip vectors
+TRUE,vzip1q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Zip vectors
+TRUE,vzip1q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Zip vectors
+TRUE,vzip1q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Zip vectors
+TRUE,vzip1q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Zip vectors
+TRUE,vzip1q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Zip vectors
+TRUE,vzip1q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Zip vectors
+TRUE,vzip1q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Zip vectors
+TRUE,vzip1q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Zip vectors
+FALSE,vzip2_f16,"a: float16x4_t, b: float16x4_t",float16x4_t,Zip vectors
+TRUE,vzip2_f32,"a: float32x2_t, b: float32x2_t",float32x2_t,Zip vectors
+TRUE,vzip2_p16,"a: poly16x4_t, b: poly16x4_t",poly16x4_t,Zip vectors
+TRUE,vzip2_p8,"a: poly8x8_t, b: poly8x8_t",poly8x8_t,Zip vectors
+TRUE,vzip2_s16,"a: int16x4_t, b: int16x4_t",int16x4_t,Zip vectors
+TRUE,vzip2_s32,"a: int32x2_t, b: int32x2_t",int32x2_t,Zip vectors
+TRUE,vzip2_s8,"a: int8x8_t, b: int8x8_t",int8x8_t,Zip vectors
+TRUE,vzip2_u16,"a: uint16x4_t, b: uint16x4_t",uint16x4_t,Zip vectors
+TRUE,vzip2_u32,"a: uint32x2_t, b: uint32x2_t",uint32x2_t,Zip vectors
+TRUE,vzip2_u8,"a: uint8x8_t, b: uint8x8_t",uint8x8_t,Zip vectors
+FALSE,vzip2q_f16,"a: float16x8_t, b: float16x8_t",float16x8_t,Zip vectors
+TRUE,vzip2q_f32,"a: float32x4_t, b: float32x4_t",float32x4_t,Zip vectors
+TRUE,vzip2q_f64,"a: float64x2_t, b: float64x2_t",float64x2_t,Zip vectors
+TRUE,vzip2q_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8_t,Zip vectors
+TRUE,vzip2q_p64,"a: poly64x2_t, b: poly64x2_t",poly64x2_t,Zip vectors
+TRUE,vzip2q_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16_t,Zip vectors
+TRUE,vzip2q_s16,"a: int16x8_t, b: int16x8_t",int16x8_t,Zip vectors
+TRUE,vzip2q_s32,"a: int32x4_t, b: int32x4_t",int32x4_t,Zip vectors
+TRUE,vzip2q_s64,"a: int64x2_t, b: int64x2_t",int64x2_t,Zip vectors
+TRUE,vzip2q_s8,"a: int8x16_t, b: int8x16_t",int8x16_t,Zip vectors
+TRUE,vzip2q_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8_t,Zip vectors
+TRUE,vzip2q_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4_t,Zip vectors
+TRUE,vzip2q_u64,"a: uint64x2_t, b: uint64x2_t",uint64x2_t,Zip vectors
+TRUE,vzip2q_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16_t,Zip vectors
+FALSE,vzipq_f16,"a: float16x8_t, b: float16x8_t",float16x8x2_t,Zip vectors
+FALSE,vzipq_f32,"a: float32x4_t, b: float32x4_t",float32x4x2_t,Zip vectors
+FALSE,vzipq_p16,"a: poly16x8_t, b: poly16x8_t",poly16x8x2_t,Zip vectors
+FALSE,vzipq_p8,"a: poly8x16_t, b: poly8x16_t",poly8x16x2_t,Zip vectors
+FALSE,vzipq_s16,"a: int16x8_t, b: int16x8_t",int16x8x2_t,Zip vectors
+FALSE,vzipq_s32,"a: int32x4_t, b: int32x4_t",int32x4x2_t,Zip vectors
+FALSE,vzipq_s8,"a: int8x16_t, b: int8x16_t",int8x16x2_t,Zip vectors
+FALSE,vzipq_u16,"a: uint16x8_t, b: uint16x8_t",uint16x8x2_t,Zip vectors
+FALSE,vzipq_u32,"a: uint32x4_t, b: uint32x4_t",uint32x4x2_t,Zip vectors
+FALSE,vzipq_u8,"a: uint8x16_t, b: uint8x16_t",uint8x16x2_t,Zip vectors
\ No newline at end of file
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index 22e1be5e4f..dec740151b 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -67,12 +67,6 @@ fn generate_rust_program(intrinsic: &Intrinsic) -> String {
 #![feature(stdsimd)]
 #![allow(overflowing_literals)]
 use core::arch::aarch64::*;
-#[allow(unused_macros)]
-macro_rules! bits_to_float(
-    ($t:ty, $bits:expr) => (
-        {{ let x: $t = ::std::mem::transmute($bits); x  }}
-    )
-);
 
 fn main() {{
 {passes}
@@ -86,14 +80,17 @@ fn main() {{
 }
 
 fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
+    let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
+
     let output = Command::new("sh")
         .arg("-c")
         .arg(format!(
-            "{cpp} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
+            "{cpp} {cppflags} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
             target = "aarch64-unknown-linux-gnu",
             filename = c_filename,
             intrinsic = intrinsic.name,
             cpp = compiler,
+            cppflags = flags,
         ))
         .output();
     if let Ok(output) = output {
@@ -182,8 +179,9 @@ path = "{intrinsic}/main.rs""#,
         .current_dir("rust_programs")
         .arg("-c")
         .arg(format!(
-            "cargo +{toolchain} build --release",
+            "cargo {toolchain} build --release --target {target}",
             toolchain = toolchain,
+            target = "aarch64-unknown-linux-gnu",
         ))
         .output();
     if let Ok(output) = output {
@@ -217,7 +215,6 @@ fn main() {
         .arg(
             Arg::with_name("TOOLCHAIN")
                 .takes_value(true)
-                .default_value("nightly")
                 .long("toolchain")
                 .help("The rust toolchain to use for building the rust code"),
         )
@@ -228,12 +225,21 @@ fn main() {
                 .long("cppcompiler")
                 .help("The C++ compiler to use for compiling the c++ code"),
         )
+        .arg(
+            Arg::with_name("RUNNER")
+                .takes_value(true)
+                .long("runner")
+                .help("Run the C programs under emulation with this command"),
+        )
         .get_matches();
 
     let filename = matches.value_of("INPUT").unwrap();
-    let toolchain = matches.value_of("TOOLCHAIN").unwrap();
-    let cpp_compiler = matches.value_of("CPPCOMPILER").unwrap();
+    let toolchain = matches
+        .value_of("TOOLCHAIN")
+        .map_or("".into(), |t| format!("+{}", t));
 
+    let cpp_compiler = matches.value_of("CPPCOMPILER").unwrap();
+    let c_runner = matches.value_of("RUNNER").unwrap_or("");
     let mut csv_reader = csv::Reader::from_reader(std::fs::File::open(filename).unwrap());
 
     let mut intrinsics = csv_reader
@@ -288,7 +294,7 @@ fn main() {
         std::process::exit(3);
     }
 
-    if !compare_outputs(&intrinsics, &toolchain) {
+    if !compare_outputs(&intrinsics, &toolchain, &c_runner) {
         std::process::exit(1)
     }
 }
@@ -299,14 +305,15 @@ enum FailureReason {
     Difference(String, String, String),
 }
 
-fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
+fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str) -> bool {
     let intrinsics = intrinsics
         .par_iter()
         .filter_map(|intrinsic| {
             let c = Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "./c_programs/{intrinsic}",
+                    "{runner} ./c_programs/{intrinsic}",
+                    runner = runner,
                     intrinsic = intrinsic.name,
                 ))
                 .output();
@@ -314,9 +321,10 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str) -> bool {
                 .current_dir("rust_programs")
                 .arg("-c")
                 .arg(format!(
-                    "cargo +{toolchain} run --release --bin {intrinsic}",
+                    "cargo {toolchain} run --release --target {target} --bin {intrinsic}",
                     intrinsic = intrinsic.name,
                     toolchain = toolchain,
+                    target = "aarch64-unknown-linux-gnu",
                 ))
                 .output();
 
diff --git a/crates/intrinsic-test/src/types.rs b/crates/intrinsic-test/src/types.rs
index c037ed3ae5..cc5b68973b 100644
--- a/crates/intrinsic-test/src/types.rs
+++ b/crates/intrinsic-test/src/types.rs
@@ -366,10 +366,10 @@ impl IntrinsicType {
             } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
                 .map(|i| {
                     format!(
-                        "{}{})",
+                        "{}({})",
                         match language {
-                            &Language::Rust => "bits_to_float!(f32, ",
-                            &Language::C => "cast<float, uint32_t>(",
+                            &Language::Rust => "f32::from_bits",
+                            &Language::C => "cast<float, uint32_t>",
                         },
                         values_for_pass(32, i, pass),
                     )
@@ -385,10 +385,10 @@ impl IntrinsicType {
             } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
                 .map(|i| {
                     format!(
-                        "{}{}{})",
+                        "{}({}{})",
                         match language {
-                            &Language::Rust => "bits_to_float!(f64,",
-                            &Language::C => "cast<double, uint64_t>(",
+                            &Language::Rust => "f64::from_bits",
+                            &Language::C => "cast<double, uint64_t>",
                         },
                         values_for_pass(64, i, pass),
                         match language {

From 0ff70cc20cc03467ae748aea2aec4f2897d1a41c Mon Sep 17 00:00:00 2001
From: Jamie Cunliffe <Jamie.Cunliffe@arm.com>
Date: Wed, 21 Jul 2021 12:37:14 +0100
Subject: [PATCH 3/4] Enable crypto and crc feature for intrinsic testing

---
 crates/intrinsic-test/src/main.rs | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index dec740151b..2ee4635064 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -23,9 +23,9 @@ pub enum Language {
     C,
 }
 
-fn generate_c_program(header_file: &str, intrinsic: &Intrinsic) -> String {
+fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
     format!(
-        r#"#include <{header_file}>
+        r#"{header_files}
 #include <iostream>
 #include <cstring>
 #include <iomanip>
@@ -52,7 +52,11 @@ int main(int argc, char **argv) {{
 {passes}
     return 0;
 }}"#,
-        header_file = header_file,
+        header_files = header_files
+            .iter()
+            .map(|header| format!("#include <{}>", header))
+            .collect::<Vec<_>>()
+            .join("\n"),
         passes = (1..20)
             .map(|idx| intrinsic.generate_pass_c(idx))
             .collect::<Vec<_>>()
@@ -85,8 +89,9 @@ fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str) -> bool {
     let output = Command::new("sh")
         .arg("-c")
         .arg(format!(
-            "{cpp} {cppflags} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
+            "{cpp} {cppflags} {arch_flags} -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
             target = "aarch64-unknown-linux-gnu",
+            arch_flags = "-march=armv8-a+crypto+crc",
             filename = c_filename,
             intrinsic = intrinsic.name,
             cpp = compiler,
@@ -125,7 +130,7 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str) -> bool {
             let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
             let mut file = File::create(&c_filename).unwrap();
 
-            let c_code = generate_c_program("arm_neon.h", &i);
+            let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
             file.write_all(c_code.into_bytes().as_slice()).unwrap();
             compile_c(&c_filename, &i, compiler)
         })

From 6056cfc5348f17a89380854844a6d5d1c3c04018 Mon Sep 17 00:00:00 2001
From: Jamie Cunliffe <Jamie.Cunliffe@arm.com>
Date: Tue, 17 Aug 2021 16:00:55 +0100
Subject: [PATCH 4/4] Use local core_arch and switch to clang-12

---
 ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +-
 ci/run.sh                                      | 2 +-
 crates/intrinsic-test/src/main.rs              | 9 ++++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index f0c1493ab8..2b43841907 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang-9 \
+  clang-12 \
   lld
 
 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
diff --git a/ci/run.sh b/ci/run.sh
index cd7a04d23f..e5e6aed1c5 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -115,7 +115,7 @@ esac
 
 if [ "${TARGET}" = "aarch64-unknown-linux-gnu" ]; then
     export CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
-    cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/neon-intrinsics.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-9"
+    cargo run ${INTRINSIC_TEST} --release --bin intrinsic-test -- crates/intrinsic-test/neon-intrinsics.csv --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-12"
 fi
 
 if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index 2ee4635064..c5ff2b5487 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -70,7 +70,7 @@ fn generate_rust_program(intrinsic: &Intrinsic) -> String {
 #![feature(link_llvm_intrinsics)]
 #![feature(stdsimd)]
 #![allow(overflowing_literals)]
-use core::arch::aarch64::*;
+use core_arch::arch::aarch64::*;
 
 fn main() {{
 {passes}
@@ -159,6 +159,8 @@ version = "{version}"
 authors = ["{authors}"]
 edition = "2018"
 [workspace]
+[dependencies]
+core_arch = {{ path = "../crates/core_arch" }}
 {binaries}"#,
                 version = env!("CARGO_PKG_VERSION"),
                 authors = env!("CARGO_PKG_AUTHORS"),
@@ -288,6 +290,11 @@ fn main() {
                 .is_none()
         })
         .filter(|i| i.arguments.iter().find(|a| a.name == "n").is_none())
+        // clang-12 fails to compile this intrinsic due to an error.
+        // fatal error: error in backend: Cannot select: 0x2b99c30: i64 = AArch64ISD::VSHL Constant:i64<1>, Constant:i32<1>
+        // 0x2b9a520: i64 = Constant<1>
+        // 0x2b9a248: i32 = Constant<1>
+        .filter(|i| !["vshld_s64", "vshld_u64"].contains(&i.name.as_str()))
         .collect::<Vec<_>>();
     intrinsics.dedup();