Closed
Description
The binary search is implemented as:
Lines 304 to 324 in 27e766d
Here I've added a caller to instantiate it with commonly used types (u32 and its cmp):
use std::cmp::Ordering::{self, *};
fn binary_search_by<T, F>(this: &[T], mut f: F) -> Result<usize, usize> where
F: FnMut(&T) -> Ordering
{
let mut base = 0usize;
let mut s = this;
loop {
let (head, tail) = s.split_at(s.len() >> 1);
if tail.is_empty() {
return Err(base)
}
match f(&tail[0]) {
Less => {
base += head.len() + 1;
s = &tail[1..];
}
Greater => s = head,
Equal => return Ok(base + head.len()),
}
}
}
pub fn foo(data: &[u32], x: u32) -> Result<usize, usize> {
binary_search_by(data, |y| x.cmp(y))
}
rustc 1.54.0-nightly with optimizations gives:
foo:
mov r11, rsi
shr r11
mov eax, 1
sub rsi, r11
je .LBB0_8
mov r8d, edx
lea r10, [rdi + 4*r11]
xor edx, edx
mov r9, -1
jmp .LBB0_4
.LBB0_2:
add rdx, r11
inc rdx
add r10, 4
dec rsi
mov r11, rsi
mov rdi, r10
.LBB0_3:
mov rcx, r11
shr rcx
lea r10, [rdi + 4*rcx]
sub r11, rcx
mov rsi, r11
mov r11, rcx
je .LBB0_7
.LBB0_4:
xor ecx, ecx
cmp dword ptr [r10], r8d
setne cl
cmova rcx, r9
cmp rcx, -1
je .LBB0_2
test rcx, rcx
jne .LBB0_3
add rdx, r11
xor eax, eax
.LBB0_7:
ret
.LBB0_8:
xor edx, edx
ret
I've noticed that replacing the call to cmp + match with simpler code, the asm becomes shorter (6 asm instructions less):
fn binary_search_by<T: Ord>(this: &[T], x: T) -> Result<usize, usize> {
let mut base = 0usize;
let mut s = this;
loop {
let (head, tail) = s.split_at(s.len() >> 1);
if tail.is_empty() {
return Err(base)
}
if x < tail[0] {
base += head.len() + 1;
s = &tail[1 ..];
} else if x > tail[0] {
s = head;
} else {
return Ok(base + head.len());
}
}
}
pub fn foo(data: &[u32], x: u32) -> Result<usize, usize> {
binary_search_by(data, x)
}
Gives:
foo:
mov r10, rsi
shr r10
mov eax, 1
sub rsi, r10
je .LBB0_1
mov r8d, edx
lea r9, [rdi + 4*r10]
xor edx, edx
jmp .LBB0_3
.LBB0_4:
add rdx, r10
inc rdx
add r9, 4
dec rsi
mov r10, rsi
mov rdi, r9
.LBB0_5:
mov rcx, r10
shr rcx
lea r9, [rdi + 4*rcx]
sub r10, rcx
mov rsi, r10
mov r10, rcx
je .LBB0_8
.LBB0_3:
cmp dword ptr [r9], r8d
ja .LBB0_4
jb .LBB0_5
add rdx, r10
xor eax, eax
.LBB0_8:
ret
.LBB0_1:
xor edx, edx
ret
Is it possible to improve rustc so it handles simple cmp+Ordering match cases like this about as efficiently as the if/else if/else case?
By the way, I've also noticed that you can rewrite the binary search function using pattern matching, avoiding explicit indexing and slicing (the asm is the same as the stdlib version):
fn binary_search_by<T, F>(mut data: &[T], mut f: F) -> Result<usize, usize> where
F: FnMut(&T) -> Ordering
{
let mut base = 0_usize;
loop {
match data.split_at(data.len() >> 1) {
(_, []) => return Err(base),
(head, [t0, rest @ ..]) => {
match f(t0) {
Less => {
base += head.len() + 1;
data = rest;
}
Greater => data = head,
Equal => return Ok(base + head.len()),
}
},
}
}
}