Skip to content

macro does not work on arrays, no way to have a manual default implementation #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
fschutt opened this issue Dec 14, 2017 · 1 comment

Comments

@fschutt
Copy link
Contributor

fschutt commented Dec 14, 2017

The problem is that I'm trying to vectorize this code:

https://github.com/fschutt/layout2d/blob/master/src/rect.rs#L43-L107

This crate is not helpful - if I use the simd function, the SIMD will also be used in the default function. There is no way to use SIMD in the function, but no SIMD in the default, fallback function.

Second, loading from fields takes a considerable amount of time in SIMD, which is why I have that weird layout with the four-number array. The macro completely fails on arrays, it does not vectorize arrays at all.

I don't know what the goal is - if you want me to write hand-vectorized code, then you need to provide to write a manual fallback function. If you want to do this work "automagically", then the macro should be smarter about arrays.

Currently the code generated with arrays is horrible:

pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    pub extern crate runtime_target_feature_rt as rt;
    static PTR: rt::atomic::Atomic<fn(&mut [f32], &mut [f32], f32)> =
        rt::atomic::Atomic::new(setup);
    fn setup(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let chosen_function = if rt::have_avx2() {
            with_enable_avx2
        } else if rt::have_sse4_1() {
            with_enable_sse4_1
        } else {
            default
        };
        PTR.store(chosen_function, rt::atomic::Ordering::Relaxed);
        chosen_function(x, y, in_angle)
    }
    fn default(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    #[target_feature = "+avx2"]
    fn with_enable_avx2(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    #[target_feature = "+sse4.1"]
    fn with_enable_sse4_1(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    PTR.load(rt::atomic::Ordering::Relaxed)(x, y, in_angle)
}

If I, however, use the stdsimd functions, I get those too in the default() function, which completely works against the point of this library:

pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    pub extern crate runtime_target_feature_rt as rt;
    static PTR: rt::atomic::Atomic<fn(&mut [f32], &mut [f32], f32)> =
        rt::atomic::Atomic::new(setup);
    fn setup(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let chosen_function = if rt::have_avx2() {
            with_enable_avx2
        } else if rt::have_sse4_1() {
            with_enable_sse4_1
        } else {
            default
        };
        PTR.store(chosen_function, rt::atomic::Ordering::Relaxed);
        chosen_function(x, y, in_angle)
    }
    fn default(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    #[target_feature = "+avx2"]
    fn with_enable_avx2(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    #[target_feature = "+sse4.1"]
    fn with_enable_sse4_1(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    PTR.load(rt::atomic::Ordering::Relaxed)(x, y, in_angle)
}

Now the default function has SIMD function, which is agains the point.

So, in practice, this library is currently useless.

@fschutt fschutt changed the title macro does not work on arrays, no way to have a default implementation macro does not work on arrays, no way to have a manual default implementation Dec 14, 2017
@parched
Copy link
Owner

parched commented Dec 21, 2017

Currently it is not very useful for explicit SIMD because of rust-lang/rust#42515. It's current use is with autovectorization. Once that issue is fixed you should be able to

#[runtime_target_feature("+avx2")]
pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    #[cfg(target_feature = "avx2")]
    // code with explicit simd

    #[cfg(not(target_feature = "avx2"))]
    // fallback code
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants