Extra level of indirection with explicit self

I found a multi-language benchmark online (https://gist.github.com/1170424) whose Rust version was outdated. I decided to update it.

Here's the original code:

```
struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}
```

I noticed that updating this code to use explicit self causes a noticeable perf hit, even though nmatsakis assures me that the semantics should be the same. For posterity, here's the updated version (the only difference is that the three methods are using explicit `&self` parameters):

```
struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(&self, x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(&self, gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(&self, x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}
```

Both versions were compiled with `rustc --opt-level=3` and profiled as follows:

```
$ (perf stat -r 10 perlin-orig) 2> orig.txt
$ (perf stat -r 10 perlin-expself) 2> expself.txt
```

orig.txt:

```
 Performance counter stats for 'perlin-orig' (10 runs):

    1352.872451  task-clock-msecs         #      0.983 CPUs    ( +-   0.177% )
             97  context-switches         #      0.000 M/sec   ( +-   5.580% )
              1  CPU-migrations           #      0.000 M/sec   ( +-   9.091% )
            738  page-faults              #      0.001 M/sec   ( +-   0.036% )
   209379655582  cycles                   # 154766.738 M/sec   ( +-   5.099% )
   209379655582  instructions             #      1.000 IPC     ( +-   5.099% )
   209379655582  branches                 # 154766.738 M/sec   ( +-   5.099% )
   209379655582  branch-misses            #    100.000 %       ( +-   5.099% )
   209379655582  cache-references         # 154766.738 M/sec   ( +-   5.099% )
   209379655582  cache-misses             # 154766.738 M/sec   ( +-   5.099% )

    1.376399979  seconds time elapsed   ( +-   0.213% )
```

expself.txt:

```
 Performance counter stats for 'perlin-expself' (10 runs):

    1389.736506  task-clock-msecs         #      0.983 CPUs    ( +-   0.096% )
            103  context-switches         #      0.000 M/sec   ( +-   7.792% )
              1  CPU-migrations           #      0.000 M/sec   ( +-  11.111% )
            738  page-faults              #      0.001 M/sec   ( +-   0.049% )
   224626789476  cycles                   # 161632.646 M/sec   ( +-   6.994% )
   224626789476  instructions             #      1.000 IPC     ( +-   6.994% )
   224626789476  branches                 # 161632.646 M/sec   ( +-   6.994% )
   224626789476  branch-misses            #    100.000 %       ( +-   6.994% )
   224626789476  cache-references         # 161632.646 M/sec   ( +-   6.994% )
   224626789476  cache-misses             # 161632.646 M/sec   ( +-   6.994% )

    1.413482586  seconds time elapsed   ( +-   0.240% )
```

@nikomatsakis has a theory:

```
< bstrie> why is explicit self slower than implicit self :(
<@nmatsakis> bstrie: the trans for that is kind of bad... I wonder if it's
             introducing extra indirections or something
<@nmatsakis> the trans of self in general needs to be reworked
```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Extra level of indirection with explicit self #4402

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Extra level of indirection with explicit self #4402

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions