-
Notifications
You must be signed in to change notification settings - Fork 0
/
string_search.cpp
119 lines (111 loc) · 3.88 KB
/
string_search.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
see string_search.h
*/
#include "string_search.h"
// constructor:
// for more details, see TaskParallelizer constructor/class description
StringSearch::StringSearch(const struct job_details t_jobs[],
const unsigned t_job_num,
TaskContainer* t_super_job_class, const int t_id)
: TaskParallelizer(t_jobs, t_job_num, t_super_job_class, t_id){};
// destructor:
StringSearch::~StringSearch(){};
// help function for escaping \t and \n characters
// and for copying prefix-sufix
char* StringSearch::create_escaped_substring
(const char* from, const unsigned length)
{
unsigned counter = 0;
for (unsigned i = 0; i < length; i++) {
if (from[i] == 9 || from[i] == 10) {
counter += 2;
} else {
counter++;
}
}
char* res_str = new char[counter + 1];
unsigned res_i = 0;
for (unsigned i = 0; i < length; i++, res_i++) {
if (from[i] == 9) {
res_str[res_i] = '\\';
res_str[++res_i] = 't';
} else if (from[i] == 10) {
res_str[res_i] = '\\';
res_str[++res_i] = 'n';
} else {
res_str[res_i] = from[i];
}
}
res_str[counter] = 0;
return res_str;
}
// main function:
void StringSearch::start(segment* &t_seg)
{
string* needle = (string*) m_job_details->job_detail;
unsigned needle_len = needle->length();
// if we are at beginning or end of file,
// allow prefix/sufix shorter than desired:
const unsigned start_padd =
t_seg->offset == 0 ? 0 : progconst::PREFIX_LEN;
const unsigned end_padd =
t_seg->last ? 0 : progconst::SUFIX_LEN;
// check if enough length:
if (needle->length() + start_padd + end_padd > t_seg->length) {
return;
}
// search for string:
char* start = t_seg->data;
char* end = start + t_seg->length;
char* search_from = start + start_padd;
char* search_to = start + t_seg->length - end_padd;
char* position;
while (search_from < search_to) {
// try to find needle occurence:
try {
position = search(search_from, search_to,
boyer_moore_searcher(needle->begin(), needle->end()));
} catch (exception& e) {
cerr << progconst::search_exception << e.what() << endl;
return;
}
if (position != search_to) {
// allocate prefix-sufix:
unsigned pref_len =
progconst::PREFIX_LEN < (position - start) ?
progconst::PREFIX_LEN : (position - start);
unsigned sufx_len =
progconst::SUFIX_LEN < (end - position - needle_len) ?
progconst::SUFIX_LEN : (end - position - needle_len);
// create strings:
char* prefix = create_escaped_substring
(position - pref_len, pref_len);
char* sufix = create_escaped_substring
(position + needle_len, sufx_len);
// create and push result object:
result* res = new result
{
prefix, sufix,
((unsigned)(position - start)) + t_seg->offset
};
((TaskParallelizer<fs::path, segment*, result*, StringSearch>*)
get_super_class())->put_sub_result(res);
} else {
break;
}
search_from = position + 1;
}
}
void StringSearch::start()
{
TaskParallelizer<fs::path, segment*, result*, StringSearch>* super_class =
(TaskParallelizer<fs::path, segment*, result*, StringSearch>*) get_super_class();
segment* next_segm = super_class->next_job_argument();
start(next_segm);
}
// this class does not have any sub-jobs,
// so it does not process any sub-job results
void StringSearch::process_sub_results()
{
return;
}