Skip to content

Commit

Permalink
support upper and lower letters.
Browse files Browse the repository at this point in the history
  • Loading branch information
smartmx committed Apr 6, 2022
1 parent fcffc8b commit a614040
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 10 deletions.
6 changes: 5 additions & 1 deletion examples/hash_match_demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ void hash_match_test1func(void *t)
{
rt_kprintf("test1\n");
}
const uint8_t hash_match_test1key[] = {251, 35, 0, 189, 76, 32, 232, 16, 168, 192};
const uint8_t hash_match_test1key[] = {'a', 'b', 'c', 'd', 'e', 'f', 232, 'g', 168, 192};
/* test upper and lower compare. */
const uint8_t hash_match_test1keyu[] = {'A', 'B', 'C', 'd', 'e', 'f', 232, 'g', 168, 192};
HASH_MATCH_EXPORT(hash_match_test, hash_match_test1, hash_match_test1key, sizeof(hash_match_test1key), hash_match_test1func, "this is test 1");

/* different section group with same key, for test. */
Expand Down Expand Up @@ -70,6 +72,8 @@ static void hash_match_test_task(void *arg)
HASH_MATCH(hash_match_test, hash_match_test1key, sizeof(hash_match_test1key), NULL);
HASH_MATCH(hash_match_test, hash_match_test2key, sizeof(hash_match_test2key), NULL);
HASH_MATCH(hash_match_test, hash_match_test3key, sizeof(hash_match_test3key), NULL);
/* test upper and lower compare. */
HASH_MATCH(hash_match_test, hash_match_test1keyu, sizeof(hash_match_test1keyu), NULL);
/* mix up the length, to try. */
HASH_MATCH(hash_match_test, hash_match_test1key, sizeof(hash_match_test2key), NULL);
HASH_MATCH(hash_match_test, hash_match_test2key, sizeof(hash_match_test3key), NULL);
Expand Down
4 changes: 2 additions & 2 deletions hash-match.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
#define hash_match_memcmp memcmp
#define HASH_MATCH_MEMCMP_SAME 0
#else
#define hash_match_memcmp
#define HASH_MATCH_MEMCMP_SAME
#define hash_match_memcmp murmurhash3_lower_char_upper_memcmp
#define HASH_MATCH_MEMCMP_SAME 0
#endif

typedef void (*hash_match_handler)(void *);
Expand Down
116 changes: 116 additions & 0 deletions murmurhash3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@

#define ROTL32(x,y) ((x << y) | (x >> (32 - y)))

/**
* calculate hash_code.
*
* @param hash_key the hash_key start address.
* @param len the length of the hash_key.
*
* @return type uint32_t, the result of calculated value.
*/
uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len)
{
const uint8_t *hash_key_ptr = (const uint8_t *)hash_key;
Expand Down Expand Up @@ -71,3 +79,111 @@ uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len)

return hash;
}

/**
* upper all lower letters in hash key and calculate the hash_code.
*
* @param c the char value.
*
* @return type uint8_t, the upper char value.
*/
static uint8_t murmurhash3_lower_char_upper(uint8_t c)
{
if ((c >= 'a') && (c <= 'z'))
return c + ('A' - 'a');
return c;
}

/**
* upper all lower letters in hash key and calculate the hash_code.
*
* @param c the char value.
*
* @return type uint8_t, the upper char value.
*/
uint8_t murmurhash3_lower_char_upper_memcmp(const void *src1, const void *src2, uint32_t len)
{
uint8_t *src1_char = (uint8_t *)src1;
uint8_t *src2_char = (uint8_t *)src2;
uint32_t all_len = len;
while (all_len)
{
if (murmurhash3_lower_char_upper(*src1_char) == murmurhash3_lower_char_upper(*src2_char))
{
src1_char++;
src2_char++;
all_len--;
}
else
{
return (len - all_len);
}

}
return 0;
}

/**
* upper all lower letters in hash key and calculate the hash_code.
*
* @param hash_key the hash_key start address.
* @param len the length of the hash_key.
*
* @return type uint32_t, the result of calculated value.
*/
uint32_t murmurhash3_upper_caculate32(const void *hash_key, uint32_t len)
{
const uint8_t *hash_key_ptr = (const uint8_t *)hash_key;
int nblocks = len / 4;
uint32_t hash = MURMURHASH3_SEED_VALUE;
uint32_t data;

/* body */
while (nblocks > 0)
{
/* get 32bit data */
data = (murmurhash3_lower_char_upper(hash_key_ptr[0]) << 24) | \
(murmurhash3_lower_char_upper(hash_key_ptr[1]) << 16) | \
(murmurhash3_lower_char_upper(hash_key_ptr[2]) << 8) | \
(murmurhash3_lower_char_upper(hash_key_ptr[3]));

data *= MURMURHASH3_C1_VALUE;
data = ROTL32(data, MURMURHASH3_R1_VALUE);
data *= MURMURHASH3_C2_VALUE;

hash ^= data;
hash = ROTL32(hash, MURMURHASH3_R2_VALUE);
hash = hash * MURMURHASH3_M_VALUE + MURMURHASH3_N_VALUE;

hash_key_ptr += 4;
nblocks--;
}
/* tail */
data = 0;
switch (len & 3)
{
case 3:
data ^= (murmurhash3_lower_char_upper(hash_key_ptr[2]) << 16);/* @suppress("No break at end of case") */
case 2:
data ^= (murmurhash3_lower_char_upper(hash_key_ptr[1]) << 8); /* @suppress("No break at end of case") */
case 1:
data ^= murmurhash3_lower_char_upper(hash_key_ptr[0]);
data *= MURMURHASH3_C1_VALUE;
data = ROTL32(data, MURMURHASH3_R1_VALUE);
data *= MURMURHASH3_C2_VALUE;
hash ^= data;
}

/* finalization */

hash ^= len;

/* Finalization mix - force all bits of a hash block to avalanche */
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;

return hash;
}
4 changes: 4 additions & 0 deletions murmurhash3.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,8 @@

extern uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len);

extern uint32_t murmurhash3_upper_caculate32(const void *hash_key, uint32_t len);

extern uint8_t murmurhash3_lower_char_upper_memcmp(const void *src1, const void *src2, uint32_t len);

#endif /* _MURMURHASH3_H_ */
54 changes: 47 additions & 7 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

但是一旦工程量变大,运行速度下降明显。比如有200个数据数组等待比较,每次都要比较很多次。

经常可以看到有的工程里`switch`里面有上百个`case strcmp():`,极度影响代码运行效率。
经常可以看到有的工程里`if(memcmp()) {}`后边有上百个`else if(memcmp()) {}`,极度影响代码运行效率。

此时,将key的数组特征化,计算出特有的hash值,来代替比较,就可以给运行速度带来很大的提升。

Expand Down Expand Up @@ -98,7 +98,7 @@ PROVIDE(hash_match_test_end = .);
. = ALIGN(4);
```

例如:
例如,提供的例程中就可以将ld文件修改如下

```ld
.text :
Expand All @@ -121,6 +121,12 @@ PROVIDE(hash_match_test_end = .);
PROVIDE(hash_match_test_end = .);
. = ALIGN(4);
/* this is for GROUP hash_match_test1 of hash_match library. */
PROVIDE(hash_match_test1_start = .);
KEEP(*(hash_match_test))
PROVIDE(hash_match_test1_end = .);
. = ALIGN(4);
} >FLASH AT>FLASH
```

Expand All @@ -131,10 +137,6 @@ PROVIDE(hash_match_test_end = .);
配置选项可以在`hash-match.h`中修改

```c
/*
* @Note: hash-match use murmur3 hash algorithm in default: https://github.com/aappleby/smhasher.
* you can use your own hash algorithm by change the definition "hash_match_caculate".
*/
#define hash_match_caculate murmurhash3_caculate32

/* whether save description of hash source or not, set 0 will not save description. */
Expand Down Expand Up @@ -170,10 +172,48 @@ PROVIDE(hash_match_test_end = .);

`HASH_MATCH_PRINTF`宏定义是在有报错信息时候调用的打印函数。

`HASH_MATCH_COMPARE_KEY`宏定义决定了在哈希值匹配成功后,是否继续匹配数据内容。一般来说,长度一样的数据内容,哈希值一样的概率小的微乎其微,所以可以根据安全性要求来决定是否继续匹配数据内容。
`HASH_MATCH_COMPARE_KEY`宏定义决定了在哈希值匹配成功后,是否继续匹配数据内容。一般来说,长度一样的数据内容,哈希值一样的概率很小,所以可以根据安全性要求来决定是否继续匹配数据内容。

`HASH_MATCH_USE_STRING_H`宏定义决定了匹配数据内容使用的函数,如果不准备使用,可以改为其他函数。

## 大小写通用匹配

`hash-match.c`中并未直接处理大小写可以通用的转换,需要在hash计算的函数中处理。

目前在`murmurhash3.c`中提供了转换大小写的处理函数,如需使用,需要把配置项改为如下:

```c
#define hash_match_caculate murmurhash3_upper_caculate32

/* whether save description of hash source or not, set 0 will not save description. */
#define HASH_MATCH_SAVE_DESC 0

/* set HASH_MATCH_INIT_CHECK to 1 will check all hash values in one group during init a group, report if some hash value is duplicated. */
#define HASH_MATCH_INIT_CHECK 0

/* change to your own printf function, or don't use it. */
#define HASH_MATCH_PRINTF printf

/* whether compare key when hash_code is same. */
#define HASH_MATCH_COMPARE_KEY 1

/* use string.h or self functions to compare key. */
#define HASH_MATCH_USE_STRING_H 0

#if HASH_MATCH_USE_STRING_H
#include "string.h"
#define hash_match_memcmp memcmp
#define HASH_MATCH_MEMCMP_SAME 0
#else
#define hash_match_memcmp murmurhash3_lower_char_upper_memcmp
#define HASH_MATCH_MEMCMP_SAME 0
#endif
```

`murmurhash3_upper_caculate32`函数会将小写字母转换为大写字母再进行哈希值计算

`murmurhash3_lower_char_upper_memcmp`函数会将小写字母转换为大写字母后再比较是否相同

## 注意事项

虽然头文件中提供了函数,但是一般不直接使用那些函数,因为需要根据不同编译器有不同的配置,所以此时需要使用宏定义的函数来对不同编译器产生适用性。
Expand Down

0 comments on commit a614040

Please sign in to comment.