macro does not work on arrays, no way to have a manual default implementation #18

fschutt · 2017-12-14T16:38:55Z

The problem is that I'm trying to vectorize this code:

https://github.com/fschutt/layout2d/blob/master/src/rect.rs#L43-L107

This crate is not helpful - if I use the simd function, the SIMD will also be used in the default function. There is no way to use SIMD in the function, but no SIMD in the default, fallback function.

Second, loading from fields takes a considerable amount of time in SIMD, which is why I have that weird layout with the four-number array. The macro completely fails on arrays, it does not vectorize arrays at all.

I don't know what the goal is - if you want me to write hand-vectorized code, then you need to provide to write a manual fallback function. If you want to do this work "automagically", then the macro should be smarter about arrays.

Currently the code generated with arrays is horrible:

pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    pub extern crate runtime_target_feature_rt as rt;
    static PTR: rt::atomic::Atomic<fn(&mut [f32], &mut [f32], f32)> =
        rt::atomic::Atomic::new(setup);
    fn setup(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let chosen_function = if rt::have_avx2() {
            with_enable_avx2
        } else if rt::have_sse4_1() {
            with_enable_sse4_1
        } else {
            default
        };
        PTR.store(chosen_function, rt::atomic::Ordering::Relaxed);
        chosen_function(x, y, in_angle)
    }
    fn default(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    #[target_feature = "+avx2"]
    fn with_enable_avx2(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    #[target_feature = "+sse4.1"]
    fn with_enable_sse4_1(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let center_y = ((y[1] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[2]) * 0.5) + x[2];
        x[0] -= center_x;
        x[1] -= center_x;
        x[2] -= center_x;
        x[3] -= center_x;
        y[0] -= center_y;
        y[1] -= center_y;
        y[2] -= center_y;
        y[3] -= center_y;
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let tl_x = (x[0] * c) - (y[0] * s);
        let tr_x = (x[1] * c) - (y[1] * s);
        let bl_x = (x[2] * c) - (y[2] * s);
        let br_x = (x[3] * c) - (y[3] * s);
        let tl_y = (x[0] * s) + (y[0] * c);
        let tr_y = (x[1] * s) + (y[1] * c);
        let bl_y = (x[2] * s) + (y[2] * c);
        let br_y = (x[3] * s) + (y[3] * c);
        x[0] = tl_x;
        x[1] = tr_x;
        x[2] = bl_x;
        x[3] = br_x;
        y[0] = tl_y;
        y[1] = tr_y;
        y[2] = bl_y;
        y[3] = br_y;
        x[0] += center_x;
        x[1] += center_x;
        x[2] += center_x;
        x[3] += center_x;
        y[0] += center_y;
        y[1] += center_y;
        y[2] += center_y;
        y[3] += center_y;
    }
    PTR.load(rt::atomic::Ordering::Relaxed)(x, y, in_angle)
}

If I, however, use the stdsimd functions, I get those too in the default() function, which completely works against the point of this library:

pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    pub extern crate runtime_target_feature_rt as rt;
    static PTR: rt::atomic::Atomic<fn(&mut [f32], &mut [f32], f32)> =
        rt::atomic::Atomic::new(setup);
    fn setup(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        let chosen_function = if rt::have_avx2() {
            with_enable_avx2
        } else if rt::have_sse4_1() {
            with_enable_sse4_1
        } else {
            default
        };
        PTR.store(chosen_function, rt::atomic::Ordering::Relaxed);
        chosen_function(x, y, in_angle)
    }
    fn default(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    #[target_feature = "+avx2"]
    fn with_enable_avx2(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    #[target_feature = "+sse4.1"]
    fn with_enable_sse4_1(x: &mut [f32], y: &mut [f32], in_angle: f32) {
        use stdsimd;
        let center_y = ((y[0] - y[2]) * 0.5) + y[2];
        let center_x = ((x[1] - x[0]) * 0.5) + x[0];
        let mut simd_x_dir = simd::f32x4::load(&x, 0);
        let mut simd_y_dir = simd::f32x4::load(&y, 0);
        simd_x_dir = simd_x_dir - simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir - simd::f32x4::splat(center_y);
        let k_angle = in_angle.to_radians();
        let s = k_angle.sin();
        let c = k_angle.cos();
        let mut simd_x_new =
            (simd_x_dir * simd::f32x4::splat(c)) - (simd_y_dir * simd::f32x4::splat(s));
        simd_y_dir = (simd_x_dir * simd::f32x4::splat(s)) + (simd_y_dir * simd::f32x4::splat(c));
        simd_x_new = simd_x_new + simd::f32x4::splat(center_x);
        simd_y_dir = simd_y_dir + simd::f32x4::splat(center_y);
        simd_x_new.store(x, 0);
        simd_y_dir.store(y, 0);
    }
    PTR.load(rt::atomic::Ordering::Relaxed)(x, y, in_angle)
}

Now the default function has SIMD function, which is agains the point.

So, in practice, this library is currently useless.

The text was updated successfully, but these errors were encountered:

parched · 2017-12-21T09:02:47Z

Currently it is not very useful for explicit SIMD because of rust-lang/rust#42515. It's current use is with autovectorization. Once that issue is fixed you should be able to

#[runtime_target_feature("+avx2")]
pub fn rotate(x: &mut [f32], y: &mut [f32], in_angle: f32) {
    #[cfg(target_feature = "avx2")]
    // code with explicit simd

    #[cfg(not(target_feature = "avx2"))]
    // fallback code
}

fschutt changed the title ~~macro does not work on arrays, no way to have a default implementation~~ macro does not work on arrays, no way to have a manual default implementation Dec 14, 2017

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

macro does not work on arrays, no way to have a manual default implementation #18

macro does not work on arrays, no way to have a manual default implementation #18

fschutt commented Dec 14, 2017

parched commented Dec 21, 2017

Uh oh!

macro does not work on arrays, no way to have a manual default implementation #18

macro does not work on arrays, no way to have a manual default implementation #18

Comments

fschutt commented Dec 14, 2017

parched commented Dec 21, 2017

Uh oh!