support upper and lower letters.

smartmx · Apr 6, 2022 · a614040 · a614040
1 parent fcffc8b
commit a614040
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 10 deletions.
diff --git a/examples/hash_match_demo.c b/examples/hash_match_demo.c
@@ -16,7 +16,9 @@ void hash_match_test1func(void *t)
 {
     rt_kprintf("test1\n");
 }
-const uint8_t hash_match_test1key[] = {251, 35, 0, 189, 76, 32, 232, 16, 168, 192};
+const uint8_t hash_match_test1key[] = {'a', 'b', 'c', 'd', 'e', 'f', 232, 'g', 168, 192};
+/* test upper and lower compare. */
+const uint8_t hash_match_test1keyu[] = {'A', 'B', 'C', 'd', 'e', 'f', 232, 'g', 168, 192};
 HASH_MATCH_EXPORT(hash_match_test, hash_match_test1, hash_match_test1key, sizeof(hash_match_test1key), hash_match_test1func, "this is test 1");
 
 /* different section group with same key, for test. */
@@ -70,6 +72,8 @@ static void hash_match_test_task(void *arg)
         HASH_MATCH(hash_match_test, hash_match_test1key, sizeof(hash_match_test1key), NULL);
         HASH_MATCH(hash_match_test, hash_match_test2key, sizeof(hash_match_test2key), NULL);
         HASH_MATCH(hash_match_test, hash_match_test3key, sizeof(hash_match_test3key), NULL);
+        /* test upper and lower compare. */
+        HASH_MATCH(hash_match_test, hash_match_test1keyu, sizeof(hash_match_test1keyu), NULL);
         /* mix up the length, to try. */
         HASH_MATCH(hash_match_test, hash_match_test1key, sizeof(hash_match_test2key), NULL);
         HASH_MATCH(hash_match_test, hash_match_test2key, sizeof(hash_match_test3key), NULL);

diff --git a/hash-match.h b/hash-match.h
@@ -41,8 +41,8 @@
     #define hash_match_memcmp       memcmp
     #define HASH_MATCH_MEMCMP_SAME  0
 #else
-    #define hash_match_memcmp
-    #define HASH_MATCH_MEMCMP_SAME
+    #define hash_match_memcmp       murmurhash3_lower_char_upper_memcmp
+    #define HASH_MATCH_MEMCMP_SAME  0
 #endif
 
 typedef void (*hash_match_handler)(void *);

diff --git a/murmurhash3.c b/murmurhash3.c
@@ -18,6 +18,14 @@
 
 #define ROTL32(x,y) ((x << y) | (x >> (32 - y)))
 
+/**
+ * calculate hash_code.
+ *
+ * @param hash_key the hash_key start address.
+ * @param len the length of the hash_key.
+ *
+ * @return type uint32_t, the result of calculated value.
+ */
 uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len)
 {
     const uint8_t *hash_key_ptr = (const uint8_t *)hash_key;
@@ -71,3 +79,111 @@ uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len)
 
     return hash;
 }
+
+/**
+ * upper all lower letters in hash key and calculate the hash_code.
+ *
+ * @param c the char value.
+ *
+ * @return type uint8_t, the upper char value.
+ */
+static uint8_t murmurhash3_lower_char_upper(uint8_t c)
+{
+    if ((c >= 'a') && (c <= 'z'))
+        return c + ('A' - 'a');
+    return c;
+}
+
+/**
+ * upper all lower letters in hash key and calculate the hash_code.
+ *
+ * @param c the char value.
+ *
+ * @return type uint8_t, the upper char value.
+ */
+uint8_t murmurhash3_lower_char_upper_memcmp(const void *src1, const void *src2, uint32_t len)
+{
+    uint8_t *src1_char = (uint8_t *)src1;
+    uint8_t *src2_char = (uint8_t *)src2;
+    uint32_t all_len = len;
+    while (all_len)
+    {
+        if (murmurhash3_lower_char_upper(*src1_char) == murmurhash3_lower_char_upper(*src2_char))
+        {
+            src1_char++;
+            src2_char++;
+            all_len--;
+        }
+        else
+        {
+            return (len - all_len);
+        }
+
+    }
+    return 0;
+}
+
+/**
+ * upper all lower letters in hash key and calculate the hash_code.
+ *
+ * @param hash_key the hash_key start address.
+ * @param len the length of the hash_key.
+ *
+ * @return type uint32_t, the result of calculated value.
+ */
+uint32_t murmurhash3_upper_caculate32(const void *hash_key, uint32_t len)
+{
+    const uint8_t *hash_key_ptr = (const uint8_t *)hash_key;
+    int nblocks = len / 4;
+    uint32_t hash = MURMURHASH3_SEED_VALUE;
+    uint32_t data;
+
+    /* body */
+    while (nblocks > 0)
+    {
+        /* get 32bit data */
+        data = (murmurhash3_lower_char_upper(hash_key_ptr[0]) << 24) | \
+               (murmurhash3_lower_char_upper(hash_key_ptr[1]) << 16) | \
+               (murmurhash3_lower_char_upper(hash_key_ptr[2]) << 8) | \
+               (murmurhash3_lower_char_upper(hash_key_ptr[3]));
+
+        data *= MURMURHASH3_C1_VALUE;
+        data = ROTL32(data, MURMURHASH3_R1_VALUE);
+        data *= MURMURHASH3_C2_VALUE;
+
+        hash ^= data;
+        hash = ROTL32(hash, MURMURHASH3_R2_VALUE);
+        hash = hash * MURMURHASH3_M_VALUE + MURMURHASH3_N_VALUE;
+
+        hash_key_ptr += 4;
+        nblocks--;
+    }
+    /* tail */
+    data = 0;
+    switch (len & 3)
+    {
+    case 3:
+        data ^= (murmurhash3_lower_char_upper(hash_key_ptr[2]) << 16);/* @suppress("No break at end of case") */
+    case 2:
+        data ^= (murmurhash3_lower_char_upper(hash_key_ptr[1]) << 8); /* @suppress("No break at end of case") */
+    case 1:
+        data ^= murmurhash3_lower_char_upper(hash_key_ptr[0]);
+        data *= MURMURHASH3_C1_VALUE;
+        data = ROTL32(data, MURMURHASH3_R1_VALUE);
+        data *= MURMURHASH3_C2_VALUE;
+        hash ^= data;
+    }
+
+    /* finalization */
+
+    hash ^= len;
+
+    /* Finalization mix - force all bits of a hash block to avalanche */
+    hash ^= hash >> 16;
+    hash *= 0x85ebca6b;
+    hash ^= hash >> 13;
+    hash *= 0xc2b2ae35;
+    hash ^= hash >> 16;
+
+    return hash;
+}
diff --git a/murmurhash3.h b/murmurhash3.h
@@ -33,4 +33,8 @@
 
 extern uint32_t murmurhash3_caculate32(const void *hash_key, uint32_t len);
 
+extern uint32_t murmurhash3_upper_caculate32(const void *hash_key, uint32_t len);
+
+extern uint8_t murmurhash3_lower_char_upper_memcmp(const void *src1, const void *src2, uint32_t len);
+
 #endif /* _MURMURHASH3_H_ */
diff --git a/readme.md b/readme.md
@@ -10,7 +10,7 @@
 
 但是一旦工程量变大，运行速度下降明显。比如有200个数据数组等待比较，每次都要比较很多次。
 
-经常可以看到有的工程里`switch`里面有上百个`case strcmp():`，极度影响代码运行效率。
+经常可以看到有的工程里`if(memcmp()) {}`后边有上百个`else if(memcmp()) {}`，极度影响代码运行效率。
 
 此时，将key的数组特征化，计算出特有的hash值，来代替比较，就可以给运行速度带来很大的提升。
 
@@ -98,7 +98,7 @@ PROVIDE(hash_match_test_end = .);
 . = ALIGN(4);
 ```
 
-例如：
+例如，提供的例程中就可以将ld文件修改如下：
 
 ```ld
 .text :
@@ -121,6 +121,12 @@ PROVIDE(hash_match_test_end = .);
     PROVIDE(hash_match_test_end = .);
     . = ALIGN(4);
 
+    /* this is for GROUP hash_match_test1 of hash_match library. */
+    PROVIDE(hash_match_test1_start = .);
+    KEEP(*(hash_match_test))
+    PROVIDE(hash_match_test1_end = .);
+    . = ALIGN(4);
+
 } >FLASH AT>FLASH 
 ```
 
@@ -131,10 +137,6 @@ PROVIDE(hash_match_test_end = .);
 配置选项可以在`hash-match.h`中修改
 
 ```c
-/*
- * @Note: hash-match use murmur3 hash algorithm in default: https://github.com/aappleby/smhasher.
- *        you can use your own hash algorithm by change the definition "hash_match_caculate".
- */
 #define hash_match_caculate         murmurhash3_caculate32
 
 /* whether save description of hash source or not, set 0 will not save description. */
@@ -170,10 +172,48 @@ PROVIDE(hash_match_test_end = .);
 
 `HASH_MATCH_PRINTF`宏定义是在有报错信息时候调用的打印函数。
 
-`HASH_MATCH_COMPARE_KEY`宏定义决定了在哈希值匹配成功后，是否继续匹配数据内容。一般来说，长度一样的数据内容，哈希值一样的概率小的微乎其微，所以可以根据安全性要求来决定是否继续匹配数据内容。
+`HASH_MATCH_COMPARE_KEY`宏定义决定了在哈希值匹配成功后，是否继续匹配数据内容。一般来说，长度一样的数据内容，哈希值一样的概率很小，所以可以根据安全性要求来决定是否继续匹配数据内容。
 
 `HASH_MATCH_USE_STRING_H`宏定义决定了匹配数据内容使用的函数，如果不准备使用，可以改为其他函数。
 
+## 大小写通用匹配
+
+`hash-match.c`中并未直接处理大小写可以通用的转换，需要在hash计算的函数中处理。
+
+目前在`murmurhash3.c`中提供了转换大小写的处理函数，如需使用，需要把配置项改为如下：
+
+```c
+#define hash_match_caculate         murmurhash3_upper_caculate32
+
+/* whether save description of hash source or not, set 0 will not save description. */
+#define HASH_MATCH_SAVE_DESC        0
+
+/* set HASH_MATCH_INIT_CHECK to 1 will check all hash values in one group during init a group, report if some hash value is duplicated. */
+#define HASH_MATCH_INIT_CHECK       0
+
+/* change to your own printf function, or don't use it. */
+#define HASH_MATCH_PRINTF           printf
+
+/* whether compare key when hash_code is same. */
+#define HASH_MATCH_COMPARE_KEY      1
+
+/* use string.h or self functions to compare key. */
+#define HASH_MATCH_USE_STRING_H     0
+
+#if HASH_MATCH_USE_STRING_H
+    #include "string.h"
+    #define hash_match_memcmp       memcmp
+    #define HASH_MATCH_MEMCMP_SAME  0
+#else
+    #define hash_match_memcmp       murmurhash3_lower_char_upper_memcmp
+    #define HASH_MATCH_MEMCMP_SAME  0
+#endif
+```
+
+`murmurhash3_upper_caculate32`函数会将小写字母转换为大写字母再进行哈希值计算
+
+`murmurhash3_lower_char_upper_memcmp`函数会将小写字母转换为大写字母后再比较是否相同
+
 ## 注意事项
 
 虽然头文件中提供了函数，但是一般不直接使用那些函数，因为需要根据不同编译器有不同的配置，所以此时需要使用宏定义的函数来对不同编译器产生适用性。