Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚀 add argmin & argmax #42

Merged
merged 5 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions benches/bench_f16_return_nan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,39 +38,111 @@ fn argminmax_rn_f16_random_array_long(c: &mut Criterion) {
c.bench_function("scalar_f16_argminmax_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argminmax(black_box(data)))
});
c.bench_function("scalar_f16_argmin_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argmin(black_box(data)))
});
c.bench_function("scalar_f16_argmax_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argmax(black_box(data)))
});
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f16_argminmax_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f16_argmin_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f16_argmax_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f16_argminmax_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f16_argmin_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f16_argmax_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512bw") {
c.bench_function("avx512_f16_argminmax_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512bw") {
c.bench_function("avx512_f16_argmin_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512bw") {
c.bench_function("avx512_f16_argmax_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f16_argminmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f16_argmin_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f16_argmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f16_argminmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f16_argmin_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f16_argmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
c.bench_function("impl_f16_argminmax_rn", |b| {
b.iter(|| black_box(data.nanargminmax()))
});
c.bench_function("impl_f16_argmin_rn", |b| {
b.iter(|| black_box(data.nanargmin()))
});
c.bench_function("impl_f16_argmax_rn", |b| {
b.iter(|| black_box(data.nanargmax()))
});
}

criterion_group!(benches, argminmax_rn_f16_random_array_long,);
Expand Down
72 changes: 72 additions & 0 deletions benches/bench_f32_ignore_nan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,111 @@ fn argminmax_in_f32_random_array_long(c: &mut Criterion) {
c.bench_function("scalar_f32_argminmax_in", |b| {
b.iter(|| SCALAR::<FloatIgnoreNaN>::argminmax(black_box(data)))
});
c.bench_function("scalar_f32_argmin_in", |b| {
b.iter(|| SCALAR::<FloatIgnoreNaN>::argmin(black_box(data)))
});
c.bench_function("scalar_f32_argmax_in", |b| {
b.iter(|| SCALAR::<FloatIgnoreNaN>::argmax(black_box(data)))
});
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argminmax_in", |b| {
b.iter(|| unsafe { SSE::<FloatIgnoreNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argmin_in", |b| {
b.iter(|| unsafe { SSE::<FloatIgnoreNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argmax_in", |b| {
b.iter(|| unsafe { SSE::<FloatIgnoreNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx") {
c.bench_function("avx_f32_argminmax_in", |b| {
b.iter(|| unsafe { AVX2::<FloatIgnoreNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx") {
c.bench_function("avx_f32_argmin_in", |b| {
b.iter(|| unsafe { AVX2::<FloatIgnoreNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx") {
c.bench_function("avx_f32_argmax_in", |b| {
b.iter(|| unsafe { AVX2::<FloatIgnoreNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argminmax_in", |b| {
b.iter(|| unsafe { AVX512::<FloatIgnoreNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argmin_in", |b| {
b.iter(|| unsafe { AVX512::<FloatIgnoreNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argmax_in", |b| {
b.iter(|| unsafe { AVX512::<FloatIgnoreNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argminmax_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argmin_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argmax_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argminmax_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argmin_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argmax_in", |b| {
b.iter(|| unsafe { NEON::<FloatIgnoreNaN>::argmax(black_box(data)) })
});
}
c.bench_function("impl_f32_argminmax_in", |b| {
b.iter(|| black_box(data.argminmax()))
});
c.bench_function("impl_f32_argmin_in", |b| {
b.iter(|| black_box(data.argmin()))
});
c.bench_function("impl_f32_argmax_in", |b| {
b.iter(|| black_box(data.argmax()))
});
}

criterion_group!(benches, argminmax_in_f32_random_array_long,);
Expand Down
72 changes: 72 additions & 0 deletions benches/bench_f32_return_nan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,111 @@ fn argminmax_rn_f32_random_array_long(c: &mut Criterion) {
c.bench_function("scalar_f32_argminmax_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argminmax(black_box(data)))
});
c.bench_function("scalar_f32_argmin_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argmin(black_box(data)))
});
c.bench_function("scalar_f32_argmax_rn", |b| {
b.iter(|| SCALAR::<FloatReturnNaN>::argmax(black_box(data)))
});
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argminmax_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argmin_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_f32_argmax_rn", |b| {
b.iter(|| unsafe { SSE::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f32_argminmax_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f32_argmin_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_f32_argmax_rn", |b| {
b.iter(|| unsafe { AVX2::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argminmax_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argmin_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512f") {
c.bench_function("avx512_f32_argmax_rn", |b| {
b.iter(|| unsafe { AVX512::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argminmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argmin_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_f32_argmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argminmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argmin_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmin(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_f32_argmax_rn", |b| {
b.iter(|| unsafe { NEON::<FloatReturnNaN>::argmax(black_box(data)) })
});
}
c.bench_function("impl_f32_argminmax_rn", |b| {
b.iter(|| black_box(data.nanargminmax()))
});
c.bench_function("impl_f32_argmin_rn", |b| {
b.iter(|| black_box(data.nanargmin()))
});
c.bench_function("impl_f32_argmax_rn", |b| {
b.iter(|| black_box(data.nanargmax()))
});
}

criterion_group!(benches, argminmax_rn_f32_random_array_long,);
Expand Down
Loading