Skip to content

Re-organize intrinsic-test to enable seamless addition of behaviour testing for more architectures #1758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8727056
Feat: Moved majority of the code to `arm` module.
madhav-madhusoodanan Mar 25, 2025
bef04d8
Chore: Added `SupportedArchitectureTest` trait which must be implemen…
madhav-madhusoodanan Mar 25, 2025
ce43975
chore: Added `ProcessedCli` to extract the logic to pre-process CLI s…
madhav-madhusoodanan Mar 26, 2025
2cdb2ac
chore: separated common logic within file creations, compile_c, compi…
madhav-madhusoodanan Mar 27, 2025
846ed3c
chore: code consolidation
madhav-madhusoodanan Mar 27, 2025
0baf79e
chore: added match block in `src/main.rs`
madhav-madhusoodanan Mar 27, 2025
d8b4a94
fixed `too many files open` issue
madhav-madhusoodanan Mar 30, 2025
4a4190e
maintaining special list of targets which need different execution co…
madhav-madhusoodanan Apr 2, 2025
8cf3cf6
rename struct for naming consistency
madhav-madhusoodanan Apr 2, 2025
c581afc
test commit to check if `load_Values_c` can be dissociated from targe…
madhav-madhusoodanan Apr 13, 2025
8ddbc03
added target field within `IntrinsicType` to perform target level che…
madhav-madhusoodanan Apr 14, 2025
1aede3e
Updated `Argument::from_c` to remove `ArgPrep` specific argument
madhav-madhusoodanan Apr 14, 2025
8740a0e
introduced generic types and code refactor
madhav-madhusoodanan Apr 16, 2025
dc4065f
Added a macro to simplify <Arch>IntrinsicType definitions
madhav-madhusoodanan Apr 16, 2025
b54d0b2
renamed `a64_only` data member in `Intrinsic` to `arch_tags`
madhav-madhusoodanan Apr 16, 2025
36f7e01
Removed aarch64-be specific execution command for rust test files
madhav-madhusoodanan Apr 17, 2025
ae42764
moved the C compilation commands into a struct for easier handling
madhav-madhusoodanan Apr 18, 2025
cb62771
Added dynamic dispatch for easier management of `<arch>ArchitectureTe…
madhav-madhusoodanan Apr 19, 2025
68959e3
code cleanup
madhav-madhusoodanan Apr 19, 2025
dc5c5a1
chore: file renaming
madhav-madhusoodanan Apr 23, 2025
030ce32
feat: made constraint common
madhav-madhusoodanan Apr 28, 2025
5ac0c71
fix: aarch64_be issues wthin compilation
madhav-madhusoodanan May 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions crates/intrinsic-test/src/arm/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
pub fn build_notices(line_prefix: &str) -> String {
format!(
"\
{line_prefix}This is a transient test file, not intended for distribution. Some aspects of the
{line_prefix}test are derived from a JSON specification, published under the same license as the
{line_prefix}`intrinsic-test` crate.\n
"
)
}

pub const POLY128_OSTREAM_DEF: &str = r#"std::ostream& operator<<(std::ostream& os, poly128_t value) {
std::stringstream temp;
do {
int n = value % 10;
value /= 10;
temp << n;
} while (value != 0);
std::string tempstr(temp.str());
std::string res(tempstr.rbegin(), tempstr.rend());
os << res;
return os;
}"#;

pub const AARCH_CONFIGURATIONS: &str = r#"
#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))]
#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
#![feature(stdarch_neon_f16)]
"#;
270 changes: 270 additions & 0 deletions crates/intrinsic-test/src/arm/functions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
use super::config::{AARCH_CONFIGURATIONS, POLY128_OSTREAM_DEF, build_notices};
use super::intrinsic::ArmIntrinsicType;
use crate::common::argument::Argument;
use crate::common::compile_c::CompilationCommandBuilder;
use crate::common::gen_c::{compile_c, create_c_filenames, generate_c_program};
use crate::common::gen_rust::{compile_rust, create_rust_filenames, generate_rust_program};
use crate::common::indentation::Indentation;
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
use crate::common::intrinsic_helpers::IntrinsicTypeDefinition;
use crate::common::write_file;
use itertools::Itertools;
use rayon::prelude::*;

// The number of times each intrinsic will be called.
const PASSES: u32 = 20;

fn gen_code_c(
indentation: Indentation,
intrinsic: &Intrinsic<ArmIntrinsicType>,
constraints: &[&Argument<ArmIntrinsicType>],
name: String,
target: &str,
) -> String {
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraint
.iter()
.map(|c| c.to_range())
.flat_map(|r| r.into_iter());

let body_indentation = indentation.nested();
range
.map(|i| {
format!(
"{indentation}{{\n\
{body_indentation}{ty} {name} = {val};\n\
{pass}\n\
{indentation}}}",
name = current.name,
ty = current.ty.c_type(),
val = i,
pass = gen_code_c(
body_indentation,
intrinsic,
constraints,
format!("{name}-{i}"),
target,
)
)
})
.join("\n")
} else {
intrinsic.generate_loop_c(indentation, &name, PASSES, target)
}
}

fn generate_c_program_arm(
header_files: &[&str],
intrinsic: &Intrinsic<ArmIntrinsicType>,
target: &str,
) -> String {
let constraints = intrinsic
.arguments
.iter()
.filter(|&i| i.has_constraint())
.collect_vec();

let indentation = Indentation::default();
generate_c_program(
build_notices("// ").as_str(),
header_files,
"aarch64",
&[POLY128_OSTREAM_DEF],
intrinsic
.arguments
.gen_arglists_c(indentation, PASSES)
.as_str(),
gen_code_c(
indentation.nested(),
intrinsic,
constraints.as_slice(),
Default::default(),
target,
)
.as_str(),
)
}

fn gen_code_rust(
indentation: Indentation,
intrinsic: &Intrinsic<ArmIntrinsicType>,
constraints: &[&Argument<ArmIntrinsicType>],
name: String,
) -> String {
println!("{}", name);
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraint
.iter()
.map(|c| c.to_range())
.flat_map(|r| r.into_iter());

let body_indentation = indentation.nested();
range
.map(|i| {
format!(
"{indentation}{{\n\
{body_indentation}const {name}: {ty} = {val};\n\
{pass}\n\
{indentation}}}",
name = current.name,
ty = current.ty.rust_type(),
val = i,
pass = gen_code_rust(
body_indentation,
intrinsic,
constraints,
format!("{name}-{i}")
)
)
})
.join("\n")
} else {
intrinsic.generate_loop_rust(indentation, &name, PASSES)
}
}

fn generate_rust_program_arm(intrinsic: &Intrinsic<ArmIntrinsicType>, target: &str) -> String {
let constraints = intrinsic
.arguments
.iter()
.filter(|i| i.has_constraint())
.collect_vec();

let indentation = Indentation::default();
let final_target = if target.contains("v7") {
"arm"
} else {
"aarch64"
};
generate_rust_program(
build_notices("// ").as_str(),
AARCH_CONFIGURATIONS,
final_target,
intrinsic
.arguments
.gen_arglists_rust(indentation.nested(), PASSES)
.as_str(),
gen_code_rust(
indentation.nested(),
intrinsic,
&constraints,
Default::default(),
)
.as_str(),
)
}

fn compile_c_arm(
intrinsics_name_list: &Vec<String>,
compiler: &str,
target: &str,
cxx_toolchain_dir: Option<&str>,
) -> bool {
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
let mut command = CompilationCommandBuilder::new()
.add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
.set_compiler(compiler)
.set_target(target)
.set_opt_level("2")
.set_cxx_toolchain_dir(cxx_toolchain_dir)
.set_project_root("c_programs")
.add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]);

if !target.contains("v7") {
command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]);
}

/*
* clang++ cannot link an aarch64_be object file, so we invoke
* aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
* are testing the intrinsics against LLVM.
*
* Note: setting `--sysroot=<...>` which is the obvious thing to do
* does not work as it gets caught up with `#include_next <stdlib.h>`
* not existing...
*/
if target.contains("aarch64_be") {
command = command
.set_linker(
cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++",
)
.set_include_paths(vec![
"/include",
"/aarch64_be-none-linux-gnu/include",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu",
"/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward",
"/aarch64_be-none-linux-gnu/libc/usr/include",
]);
}

if !compiler.contains("clang") {
command = command.add_extra_flag("-flax-vector-conversions");
}

let compiler_commands = intrinsics_name_list
.iter()
.map(|intrinsic_name| {
command
.clone()
.set_input_name(intrinsic_name)
.set_output_name(intrinsic_name)
.to_string()
})
.collect::<Vec<_>>();

compile_c(&compiler_commands)
}

pub fn build_c(
intrinsics: &Vec<Intrinsic<ArmIntrinsicType>>,
compiler: Option<&str>,
target: &str,
cxx_toolchain_dir: Option<&str>,
) -> bool {
let intrinsics_name_list = intrinsics
.par_iter()
.map(|i| i.name.clone())
.collect::<Vec<_>>();
let filename_mapping = create_c_filenames(&intrinsics_name_list);

intrinsics.par_iter().for_each(|i| {
let c_code = generate_c_program_arm(&["arm_neon.h", "arm_acle.h", "arm_fp16.h"], i, target);
match filename_mapping.get(&i.name) {
Some(filename) => write_file(filename, c_code),
None => {}
};
});

match compiler {
None => true,
Some(compiler) => compile_c_arm(&intrinsics_name_list, compiler, target, cxx_toolchain_dir),
}
}

pub fn build_rust(
intrinsics: &[Intrinsic<ArmIntrinsicType>],
toolchain: Option<&str>,
target: &str,
linker: Option<&str>,
) -> bool {
let intrinsics_name_list = intrinsics
.par_iter()
.map(|i| i.name.clone())
.collect::<Vec<_>>();
let filename_mapping = create_rust_filenames(&intrinsics_name_list);

intrinsics.par_iter().for_each(|i| {
let rust_code = generate_rust_program_arm(i, target);
match filename_mapping.get(&i.name) {
Some(filename) => write_file(filename, rust_code),
None => {}
}
});

let intrinsics_name_list = intrinsics.iter().map(|i| i.name.as_str()).collect_vec();

compile_rust(&intrinsics_name_list, toolchain, target, linker)
}
89 changes: 89 additions & 0 deletions crates/intrinsic-test/src/arm/intrinsic.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use crate::base_intrinsictype_trait_def_macro;
use crate::common::argument::ArgumentList;
use crate::common::cli::Language;
use crate::common::indentation::Indentation;
use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition};
use crate::common::intrinsic_helpers::{
BaseIntrinsicTypeDefinition, IntrinsicTypeDefinition, TypeKind,
};

base_intrinsictype_trait_def_macro! {ArmIntrinsicType}

impl IntrinsicDefinition<ArmIntrinsicType> for Intrinsic<ArmIntrinsicType> {
fn arguments(&self) -> ArgumentList<ArmIntrinsicType> {
self.arguments.clone()
}

fn results(&self) -> ArmIntrinsicType {
self.results.clone()
}

fn name(&self) -> String {
self.name.clone()
}

/// Generates a std::cout for the intrinsics results that will match the
/// rust debug output format for the return type. The generated line assumes
/// there is an int i in scope which is the current pass number.
fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
let lanes = if self.results().num_vectors() > 1 {
(0..self.results().num_vectors())
.map(|vector| {
format!(
r#""{ty}(" << {lanes} << ")""#,
ty = self.results().c_single_vector_type(),
lanes = (0..self.results().num_lanes())
.map(move |idx| -> std::string::String {
format!(
"{cast}{lane_fn}(__return_value.val[{vector}], {lane})",
cast = self.results().c_promotion(),
lane_fn = self.results().get_lane_function(),
lane = idx,
vector = vector,
)
})
.collect::<Vec<_>>()
.join(r#" << ", " << "#)
)
})
.collect::<Vec<_>>()
.join(r#" << ", " << "#)
} else if self.results().num_lanes() > 1 {
(0..self.results().num_lanes())
.map(|idx| -> std::string::String {
format!(
"{cast}{lane_fn}(__return_value, {lane})",
cast = self.results().c_promotion(),
lane_fn = self.results().get_lane_function(),
lane = idx
)
})
.collect::<Vec<_>>()
.join(r#" << ", " << "#)
} else {
format!(
"{promote}cast<{cast}>(__return_value)",
cast = match self.results.kind() {
TypeKind::Float if self.results().inner_size() == 16 => "float16_t".to_string(),
TypeKind::Float if self.results().inner_size() == 32 => "float".to_string(),
TypeKind::Float if self.results().inner_size() == 64 => "double".to_string(),
TypeKind::Int => format!("int{}_t", self.results().inner_size()),
TypeKind::UInt => format!("uint{}_t", self.results().inner_size()),
TypeKind::Poly => format!("poly{}_t", self.results().inner_size()),
ty => todo!("print_result_c - Unknown type: {:#?}", ty),
},
promote = self.results().c_promotion(),
)
};

format!(
r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
ty = if self.results().is_simd() {
format!("{}(", self.results().c_type())
} else {
String::from("")
},
close = if self.results().is_simd() { ")" } else { "" },
)
}
}
Loading