Skip to content

Commit

Permalink
enh: 代码结构
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangYyang committed Jul 25, 2023
1 parent 4c18940 commit 2641384
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 76 deletions.
3 changes: 3 additions & 0 deletions src/mm_heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,9 @@ static void* heap_re_allocate_impl(
*/
static void heap_free_impl(Heap* heap, void* mem)
{
if (mem == NULL) {
return;
}
FreeBlock* block = (FreeBlock*)((uptr_t)(mem) - sizeof(FreeBlock));
// 本内存块大小
heap_size_t origin_sz = block->size;
Expand Down
114 changes: 38 additions & 76 deletions src/mm_pattern.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@
static mstr_result_t mstr_find_simple(
isize_t*, const char*, usize_t, usize_t, const char*, usize_t
);
static mstr_result_t patt_match_naive(
isize_t*, const mstr_char_t*, usize_t, const mstr_char_t*, usize_t
);
static mstr_result_t patt_match_large(
static mstr_result_t patt_match_impl(
isize_t*, const mstr_char_t*, usize_t, const mstr_char_t*, usize_t
);
static void make_jump_table(uint8_t*, const mstr_char_t*, usize_t);

//
// public:
Expand Down Expand Up @@ -159,54 +157,54 @@ static mstr_result_t mstr_find_simple(
*result = 0;
return MStr_Ok;
}
if (pattern_cnt < BM_THRESHOLD_CNT) {
ret_code = patt_match_naive(
&find_res,
main_str + begin_pos,
main_str_cnt - begin_pos,
pattern,
pattern_cnt
);
}
else {
ret_code = patt_match_large(
&find_res,
main_str + begin_pos,
main_str_cnt - begin_pos,
pattern,
pattern_cnt
);
}
ret_code = patt_match_impl(
&find_res,
main_str + begin_pos,
main_str_cnt - begin_pos,
pattern,
pattern_cnt
);
if (MSTR_SUCC(ret_code)) {
*result = find_res;
}
return ret_code;
}

/**
* @brief 暴力方式的子串匹配
* @brief 子串匹配
*
* @param[out] res: 匹配结果
* @param[in] mstr: 主串
* @param[in] mstr_cnt: 主串长度
* @param[in] patt: 模式串
* @param[in] patt_len: 模式串长度
*/
static mstr_result_t patt_match_naive(
static mstr_result_t patt_match_impl(
isize_t* res,
const mstr_char_t* mstr,
usize_t mstr_cnt,
const mstr_char_t* patt,
usize_t patt_cnt
)
{
uint8_t* bad_char_arr = NULL;
usize_t match_cnt = patt_cnt - 1;
const mstr_char_t* m_b = mstr;
const mstr_char_t* m_p = mstr;
const mstr_char_t* m_end = mstr + mstr_cnt;
mstr_assert(patt_cnt > 0);
// 默认为没有找到
*res = -1;
// 分配bad char数组, 并构造跳转表
if (patt_cnt >= BM_THRESHOLD_CNT) {
bad_char_arr = (uint8_t*)mstr_heap_alloc(
sizeof(*bad_char_arr) * BM_CHAR_INDEX_MAX
);
if (bad_char_arr == NULL) {
return MStr_Err_HeapTooSmall;
}
make_jump_table(bad_char_arr, patt, patt_cnt);
}
// 匹配子串
while (m_p < m_end - match_cnt) {
isize_t i = (isize_t)match_cnt;
Expand All @@ -220,52 +218,39 @@ static mstr_result_t patt_match_naive(
break;
}
else {
// 匹配失败, 跳过1个字符的距离
m_p += 1;
// substring的第i个字符匹配失败, m_p + i处失败
// 移动next_pos到匹配失败的字符最后一次在sub
// string中出现的位置处
usize_t arr_idx = (usize_t)(uint8_t)m_p[i];
mstr_assert(arr_idx >= 0 && arr_idx <= BM_CHAR_INDEX_MAX);
m_p += bad_char_arr ? (usize_t)bad_char_arr[arr_idx] : 1;
}
}
if (bad_char_arr != NULL) {
mstr_heap_free(bad_char_arr);
}
return MStr_Ok;
}

/**
* @brief 基于Boyer-Moore实现的字符串匹配
* @brief 按照模式串 patt 构造跳转表
*
* @param[out] res: 匹配结果
* @param[in] mstr: 主串
* @param[in] mstr_len: 主串长度
* @param[out] table: 跳转表
* @param[in] patt: 模式串
* @param[in] patt_cnt: 模式串长度
* @param[in] patt_cnt: 模式串的字符数
*/
static mstr_result_t patt_match_large(
isize_t* res,
const mstr_char_t* mstr,
usize_t mstr_cnt,
const mstr_char_t* patt,
usize_t patt_cnt
static void make_jump_table(
uint8_t* table, const mstr_char_t* patt, usize_t patt_cnt
)
{
uint8_t* bad_char_arr;
usize_t match_cnt = patt_cnt - 1;
const mstr_char_t* m_b = mstr;
const mstr_char_t* m_p = mstr;
const mstr_char_t* m_end = mstr + mstr_cnt;
const usize_t BAD_CHAR_MAX_OFFSET = 255;
mstr_assert(patt_cnt > 0);
// 默认为没有找到
*res = -1;
// 分配bad char数组
bad_char_arr = (uint8_t*)mstr_heap_alloc(
sizeof(*bad_char_arr) * BM_CHAR_INDEX_MAX
);
if (bad_char_arr == NULL) {
return MStr_Err_HeapTooSmall;
}
// 默认情况, 没有出现在sub string的字符移动substring.length长度
for (usize_t i = 0; i < BM_CHAR_INDEX_MAX; i += 1) {
usize_t offset = patt_cnt > BAD_CHAR_MAX_OFFSET ?
BAD_CHAR_MAX_OFFSET :
patt_cnt;
bad_char_arr[i] = (uint8_t)offset;
table[i] = (uint8_t)offset;
}
// 不然, bad_char[ch]就是ch在sub string中最后一次出现的位置
for (usize_t i = 0; i < patt_cnt - 1; i += 1) {
Expand All @@ -274,29 +259,6 @@ static mstr_result_t patt_match_large(
offset > BAD_CHAR_MAX_OFFSET ? BAD_CHAR_MAX_OFFSET : offset;
usize_t ch_idx = (usize_t)(uint8_t)patt[i];
mstr_assert(ch_idx >= 0 && ch_idx <= BM_CHAR_INDEX_MAX);
bad_char_arr[ch_idx] = (uint8_t)li_off;
}
// 匹配子串
while (m_p < m_end - match_cnt) {
isize_t i = (isize_t)match_cnt;
// 倒着来匹配
while (i >= 0 && patt[i] == m_p[i]) {
i -= 1;
}
if (i < 0) {
// 匹配成功
*res = (isize_t)(m_p - m_b);
break;
}
else {
// substring的第i个字符匹配失败, m_p + i处失败
// 移动next_pos到匹配失败的字符最后一次在sub
// string中出现的位置处
usize_t arr_idx = (usize_t)(uint8_t)m_p[i];
mstr_assert(arr_idx >= 0 && arr_idx <= BM_CHAR_INDEX_MAX);
m_p += (usize_t)bad_char_arr[arr_idx];
}
table[ch_idx] = (uint8_t)li_off;
}
mstr_heap_free(bad_char_arr);
return MStr_Ok;
}

0 comments on commit 2641384

Please sign in to comment.