From 11e79422aaeebba37864cf8010ad4c9b075c39f2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 30 Dec 2025 13:23:55 -0700 Subject: [PATCH] optimize md5 --- datafusion/functions/src/crypto/basic.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/datafusion/functions/src/crypto/basic.rs b/datafusion/functions/src/crypto/basic.rs index 5a7d891e0cf1c..bda16684c8b6d 100644 --- a/datafusion/functions/src/crypto/basic.rs +++ b/datafusion/functions/src/crypto/basic.rs @@ -33,7 +33,7 @@ use datafusion_common::{ use datafusion_expr::ColumnarValue; use md5::Md5; use sha2::{Sha224, Sha256, Sha384, Sha512}; -use std::fmt::{self, Write}; +use std::fmt; use std::str::FromStr; use std::sync::Arc; @@ -157,14 +157,18 @@ pub fn md5(args: &[ColumnarValue]) -> Result { }) } -/// this function exists so that we do not need to pull in the crate hex. it is only used by md5 -/// function below +/// Hex encoding lookup table for fast byte-to-hex conversion +const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef"; + +/// Fast hex encoding using a lookup table instead of format strings. +/// This is significantly faster than using `write!("{:02x}")` for each byte. #[inline] fn hex_encode>(data: T) -> String { - let mut s = String::with_capacity(data.as_ref().len() * 2); - for b in data.as_ref() { - // Writing to a string never errors, so we can unwrap here. - write!(&mut s, "{b:02x}").unwrap(); + let bytes = data.as_ref(); + let mut s = String::with_capacity(bytes.len() * 2); + for &b in bytes { + s.push(HEX_CHARS_LOWER[(b >> 4) as usize] as char); + s.push(HEX_CHARS_LOWER[(b & 0x0f) as usize] as char); } s }