-
Notifications
You must be signed in to change notification settings - Fork 8
/
memmem.c
102 lines (88 loc) · 3.44 KB
/
memmem.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
* ibex - pseudo-library
*
* Copyright (c) 2015 xerub
* Boyer-Moore-Horspool algorithm from Wikipedia
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "plib.h"
#define UCHAR_MAX 255
unsigned char *
boyermoore_horspool_memmem(const unsigned char* haystack, size_t hlen,
const unsigned char* needle, size_t nlen)
{
size_t last, scan = 0;
size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called:
* bad character shift */
/* Sanity checks on the parameters */
if (nlen <= 0 || !haystack || !needle)
return NULL;
/* ---- Preprocess ---- */
/* Initialize the table to default value */
/* When a character is encountered that does not occur
* in the needle, we can safely skip ahead for the whole
* length of the needle.
*/
for (scan = 0; scan <= UCHAR_MAX; scan = scan + 1)
bad_char_skip[scan] = nlen;
/* C arrays have the first byte at [0], therefore:
* [nlen - 1] is the last byte of the array. */
last = nlen - 1;
/* Then populate it with the analysis of the needle */
for (scan = 0; scan < last; scan = scan + 1)
bad_char_skip[needle[scan]] = last - scan;
/* ---- Do the matching ---- */
/* Search the haystack, while the needle can still be within it. */
while (hlen >= nlen)
{
/* scan from the end of the needle */
for (scan = last; haystack[scan] == needle[scan]; scan = scan - 1)
if (scan == 0) /* If the first byte matches, we've found it. */
return (void *)haystack;
/* otherwise, we need to skip some bytes and start again.
Note that here we are getting the skip value based on the last byte
of needle, no matter where we didn't match. So if needle is: "abcd"
then we are skipping based on 'd' and that value will be 4, and
for "abcdd" we again skip on 'd' but the value will be only 1.
The alternative of pretending that the mismatched character was
the last character is slower in the normal case (E.g. finding
"abcd" in "...azcd..." gives 4 by using 'd' but only
4-2==2 using 'z'. */
hlen -= bad_char_skip[haystack[last]];
haystack += bad_char_skip[haystack[last]];
}
return NULL;
}
void *
memmem(const void *haystack, size_t hlen, const void *needle, size_t nlen)
{
const unsigned char *h;
const unsigned char *n;
if (!nlen) {
return (void *)haystack;
}
if (nlen > hlen) {
return NULL;
}
if (nlen >= 4 && hlen >= 256) {
return boyermoore_horspool_memmem(haystack, hlen, needle, nlen);
}
for (h = haystack, n = needle; hlen >= nlen; hlen--, h++) {
if (*h == *n && !memcmp(h + 1, n + 1, nlen - 1)) {
return (char *)h;
}
}
return NULL;
}