Skip to content

cmp+Ordering match versus if+else if+then for binary_search_by #86354

Closed
@leonardo-m

Description

@leonardo-m

The binary search is implemented as:

rust/src/libcore/slice.rs

Lines 304 to 324 in 27e766d

fn binary_search_by<F>(&self, mut f: F) -> Result<usize, usize> where
F: FnMut(&T) -> Ordering
{
let mut base = 0usize;
let mut s = self;
loop {
let (head, tail) = s.split_at(s.len() >> 1);
if tail.is_empty() {
return Err(base)
}
match f(&tail[0]) {
Less => {
base += head.len() + 1;
s = &tail[1..];
}
Greater => s = head,
Equal => return Ok(base + head.len()),
}
}
}

Here I've added a caller to instantiate it with commonly used types (u32 and its cmp):

use std::cmp::Ordering::{self, *};

fn binary_search_by<T, F>(this: &[T], mut f: F) -> Result<usize, usize> where
        F: FnMut(&T) -> Ordering
    {
        let mut base = 0usize;
        let mut s = this;

        loop {
            let (head, tail) = s.split_at(s.len() >> 1);
            if tail.is_empty() {
                return Err(base)
            }
            match f(&tail[0]) {
                Less => {
                    base += head.len() + 1;
                    s = &tail[1..];
                }
                Greater => s = head,
                Equal => return Ok(base + head.len()),
            }
        }
}

pub fn foo(data: &[u32], x: u32) -> Result<usize, usize> {
    binary_search_by(data, |y| x.cmp(y))
}

rustc 1.54.0-nightly with optimizations gives:

foo:
	mov     r11, rsi
	shr     r11
	mov     eax, 1
	sub     rsi, r11
	je      .LBB0_8
	mov     r8d, edx
	lea     r10, [rdi + 4*r11]
	xor     edx, edx
	mov     r9, -1
	jmp     .LBB0_4
.LBB0_2:
	add     rdx, r11
	inc     rdx
	add     r10, 4
	dec     rsi
	mov     r11, rsi
	mov     rdi, r10
.LBB0_3:
	mov     rcx, r11
	shr     rcx
	lea     r10, [rdi + 4*rcx]
	sub     r11, rcx
	mov     rsi, r11
	mov     r11, rcx
	je      .LBB0_7
.LBB0_4:
	xor     ecx, ecx
	cmp     dword ptr [r10], r8d
	setne   cl
	cmova   rcx, r9
	cmp     rcx, -1
	je      .LBB0_2
	test    rcx, rcx
	jne     .LBB0_3
	add     rdx, r11
	xor     eax, eax
.LBB0_7:
	ret
.LBB0_8:
	xor     edx, edx
	ret

I've noticed that replacing the call to cmp + match with simpler code, the asm becomes shorter (6 asm instructions less):

fn binary_search_by<T: Ord>(this: &[T], x: T) -> Result<usize, usize> {
        let mut base = 0usize;
        let mut s = this;

        loop {
            let (head, tail) = s.split_at(s.len() >> 1);
            if tail.is_empty() {
                return Err(base)
            }
            if x < tail[0] {
                base += head.len() + 1;
                s = &tail[1 ..];
            } else if x > tail[0] {
                s = head;
            } else {
                return Ok(base + head.len());
            }
        }
}

pub fn foo(data: &[u32], x: u32) -> Result<usize, usize> {
    binary_search_by(data, x)
}

Gives:

foo:
	mov     r10, rsi
	shr     r10
	mov     eax, 1
	sub     rsi, r10
	je      .LBB0_1
	mov     r8d, edx
	lea     r9, [rdi + 4*r10]
	xor     edx, edx
	jmp     .LBB0_3
.LBB0_4:
	add     rdx, r10
	inc     rdx
	add     r9, 4
	dec     rsi
	mov     r10, rsi
	mov     rdi, r9
.LBB0_5:
	mov     rcx, r10
	shr     rcx
	lea     r9, [rdi + 4*rcx]
	sub     r10, rcx
	mov     rsi, r10
	mov     r10, rcx
	je      .LBB0_8
.LBB0_3:
	cmp     dword ptr [r9], r8d
	ja      .LBB0_4
	jb      .LBB0_5
	add     rdx, r10
	xor     eax, eax
.LBB0_8:
	ret
.LBB0_1:
	xor     edx, edx
	ret

Is it possible to improve rustc so it handles simple cmp+Ordering match cases like this about as efficiently as the if/else if/else case?

By the way, I've also noticed that you can rewrite the binary search function using pattern matching, avoiding explicit indexing and slicing (the asm is the same as the stdlib version):

fn binary_search_by<T, F>(mut data: &[T], mut f: F) -> Result<usize, usize> where
        F: FnMut(&T) -> Ordering
    {
        let mut base = 0_usize;

        loop {
            match data.split_at(data.len() >> 1) {
                (_, []) => return Err(base),
                (head, [t0, rest @ ..]) => {
                    match f(t0) {
                        Less => {
                            base += head.len() + 1;
                            data = rest;
                        }
                        Greater => data = head,
                        Equal => return Ok(base + head.len()),
                    }
                },
            }
        }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-codegenArea: Code generation

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions