Skip to content

Commit

Permalink
perf: mutate strings for padding
Browse files Browse the repository at this point in the history
mutates the strings in order to pad them to the desired size, rather
than constructing new strings.

Also some minor updates to the `to_radix_str` algorithm to improve
performance.

This yields across the board improvements in benchmarks:

| bench                             | improvement |
|-----------------------------------|-------------|
| generate cuid                     | ~18%        |
| generate cuid slug                | ~14%        |
| generate many cuids               | ~18%        |
| generate many slugs               | ~17%        |
| multithreaded perf                | ~9%         |
| multithreaded perf (many threads) | ~9%         |
  • Loading branch information
mplanchard committed Mar 26, 2021
1 parent a09bfad commit 3a5cc2e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 71 deletions.
11 changes: 4 additions & 7 deletions src/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ use std::process;

use hostname;

use crate::BASE;
use crate::error::CuidError;
use crate::text::{pad, to_base_string};
use crate::BASE;

static FINGERPRINT_PADDING: usize = 2;

fn pid() -> Result<String, CuidError> {
to_base_string(process::id())
.map(|s| pad(FINGERPRINT_PADDING, &s))
.map(|mut s| pad(FINGERPRINT_PADDING, s))
.map_err(|_| CuidError::FingerprintError("Could not encode pid"))
}

Expand All @@ -20,7 +20,7 @@ fn convert_hostname(hn: &String) -> Result<String, CuidError> {
hn.chars()
.fold(hn.len() + BASE as usize, |acc, c| acc + c as usize) as u64,
)
.map(|base_str| pad(FINGERPRINT_PADDING, &base_str))
.map(|mut base_str| pad(FINGERPRINT_PADDING, base_str))
}

fn host_id() -> Result<String, CuidError> {
Expand Down Expand Up @@ -59,10 +59,7 @@ mod fingerprint_tests {

#[test]
fn test_convert_hostname_3() {
assert_eq!(
"nf",
&*convert_hostname(&"mr-magoo".into()).unwrap()
)
assert_eq!("nf", &*convert_hostname(&"mr-magoo".into()).unwrap())
}

#[test]
Expand Down
11 changes: 2 additions & 9 deletions src/random.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,25 @@
use rand::{CryptoRng, Rng, thread_rng};
use rand::{thread_rng, CryptoRng, Rng};

use crate::error::CuidError;
use crate::text::{pad, to_base_string};
use crate::{BLOCK_SIZE, DISCRETE_VALUES};


fn random_float_from_rng<R: Rng + CryptoRng>(mut rng: R) -> f64 {
rng.gen::<f64>()
}


fn random_float() -> f64 {
random_float_from_rng(thread_rng())
}


fn random_64_bit_int<N: Into<f64>>(max: N) -> u64 {
(random_float() * max.into()) as u64
}


pub fn random_block() -> Result<String, CuidError> {
to_base_string(random_64_bit_int(DISCRETE_VALUES as u32))
.map(|s| pad(BLOCK_SIZE, s))
to_base_string(random_64_bit_int(DISCRETE_VALUES as u32)).map(|mut s| pad(BLOCK_SIZE, s))
}


#[cfg(test)]
mod test_randoms {
use super::*;
Expand All @@ -40,7 +34,6 @@ mod test_randoms {
fn multiple_blocks_not_equal() {
assert!(random_block().unwrap() != random_block().unwrap())
}

}

#[cfg(nightly)]
Expand Down
88 changes: 33 additions & 55 deletions src/text.rs
Original file line number Diff line number Diff line change
@@ -1,116 +1,96 @@
use std::char;
use std::f64;

use crate::BASE;
use crate::error::CuidError;


fn digits_in_base<N: Into<f64>>(base: u8, number: N) -> u64 {
number.into().log(base as f64) as u64 + 1
}

use crate::BASE;

fn to_radix_string<N: Into<u128>>(radix: u8, number: N) -> Result<String, CuidError> {
let mut number = number.into();
let rad_u32: u32 = radix.into();

if number < radix.into() {
// No need to allocate a vector or do any math
// NOTE: we are okay to cast to u32 here, b/c number < radix,
// which has to be 255 or below.
return char::from_digit(number as u32, radix.into())
return char::from_digit(number as u32, rad_u32)
.map(|c| c.to_string())
.ok_or(CuidError::TextError("Bad digit"))
}
else if number > f64::MAX as u128 {
.ok_or(CuidError::TextError("Bad digit"));
} else if number > f64::MAX as u128 {
return Err(CuidError::TextError("Input number too large"));
}

let mut chars: Vec<char> = Vec::with_capacity(
digits_in_base(radix, number as f64) as usize
);
// 64 chars should almost always be enough to fill without needing to grow
let mut chars: Vec<char> = Vec::with_capacity(64);
while number > 0 {
chars.push(
char::from_digit((number % radix as u128) as u32, radix.into()).unwrap()
);
// We can unwrap here b/c we know that the modulus must be less than the
// radix, which is less than 256
chars.push(char::from_digit((number % radix as u128) as u32, rad_u32).unwrap());
number = number / radix as u128;
}
Ok(chars.iter().rev().collect::<String>())
chars.reverse();
Ok(chars.into_iter().collect())
}


pub fn to_base_string<N: Into<u128>>(number: N) -> Result<String, CuidError> {
to_radix_string(BASE, number)
}


fn pad_with_char<S: AsRef<str>>(pad_char: char, size: usize, to_pad: S) -> String {
let pad_ref = to_pad.as_ref();
let length = pad_ref.len();
fn pad_with_char(pad_char: char, size: usize, mut to_pad: String) -> String {
let length = to_pad.len();
if length == size {
return pad_ref.into();
}
else if length > size {
return pad_ref[length - size..].into();
}
let mut ret = String::with_capacity(size as usize);
return to_pad;
} else if length > size {
// Cut from the start of the string to pad down to the expected size,
// e.g. for a size of 2, `abc` would become `bc`
to_pad.replace_range(0..length - size, "");
return to_pad;
}
let size_diff = size - length;
to_pad.reserve(size_diff);
for _ in 0..(size - length) {
ret.push(pad_char);
to_pad.insert(0, pad_char);
}
ret.push_str(pad_ref);
ret
to_pad
}


pub fn pad<S: AsRef<str>>(size: usize, to_pad: S) -> String {
pad_with_char('0', size, to_pad.as_ref())
pub fn pad(size: usize, mut to_pad: String) -> String {
pad_with_char('0', size, to_pad)
}


#[cfg(test)]
mod pad_tests {
use super::*;

#[test]
fn does_not_pad_str_of_size() {
assert_eq!("foo", &*pad_with_char('a', 3, "foo"))
assert_eq!("foo", &*pad_with_char('a', 3, "foo".into()))
}

#[test]
fn single_char_pad() {
assert_eq!("afoo", &*pad_with_char('a', 4, "foo"))
assert_eq!("afoo", &*pad_with_char('a', 4, "foo".into()))
}

#[test]
fn multichar_pad() {
assert_eq!("aaafoo", &*pad_with_char('a', 6, "foo"))
assert_eq!("aaafoo", &*pad_with_char('a', 6, "foo".into()))
}

#[test]
fn smaller_pad() {
assert_eq!("c", &*pad_with_char('a', 1, "abc"))
assert_eq!("c", &*pad_with_char('a', 1, "abc".into()))
}

#[test]
fn pad_0s() {
assert_eq!("00foo", &*pad(5, "foo"))
assert_eq!("00foo", &*pad(5, "foo".into()))
}

}


#[cfg(test)]
mod radix_str_tests {
use super::*;

#[test]
fn digits_in_base_7() {
assert_eq!(4, digits_in_base(7, 1446))
}

#[test]
fn digits_in_base_4() {
assert_eq!(3, digits_in_base(4, 48))
}

#[test]
fn hex_number_below_radix() {
assert_eq!("8", &*to_radix_string(16, 8u8).unwrap());
Expand All @@ -135,7 +115,6 @@ mod radix_str_tests {
fn large_base_36() {
assert_eq!("7cik2", &*to_radix_string(36, 12341234u32).unwrap())
}

}

#[cfg(nightly)]
Expand Down Expand Up @@ -193,5 +172,4 @@ mod benchmarks {
pad_with_char('0', 12, "ooo ooo ooo ooo ");
});
}

}

0 comments on commit 3a5cc2e

Please sign in to comment.