-
Notifications
You must be signed in to change notification settings - Fork 4
/
parser_dense.h
101 lines (89 loc) · 3.17 KB
/
parser_dense.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#pragma once
#include"config.h"
#include"logger.h"
#include<stdlib.h>
#include<memory>
#include<vector>
#include<functional>
#include<algorithm>
class StringRef {
private:
char const *begin_;
int size_;
public:
int size() const { return size_; }
char const *begin() const { return begin_; }
char const *end() const { return begin_ + size_; }
StringRef(char const *const begin, int const size)
: begin_(begin), size_(size) {}
};
class ParserDense{
private:
const int MAX_LINE = 10000000;
std::function<void(idx_t,std::vector<std::pair<int,value_t>>)> consume;
std::vector<StringRef> split(const char* str, char delimiter = ' ') = delete;
std::vector<StringRef> split(char* str, char delimiter = ' ') = delete;
std::vector<StringRef> split(std::string const &str, char delimiter = ' ') {
std::vector<StringRef> result;
enum State {
inSpace, inToken
};
State state = inSpace;
char const *pTokenBegin = 0; // Init to satisfy compiler.
for (const char &it : str) {
State const newState = (it == delimiter ? inSpace : inToken);
if (newState != state) {
switch (newState) {
case inSpace:
result.emplace_back(pTokenBegin, &it - pTokenBegin);
break;
case inToken:
pTokenBegin = ⁢
}
}
state = newState;
}
if (state == inToken) {
result.emplace_back(pTokenBegin, &*str.end() - pTokenBegin);
}
return result;
}
void ltrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
return !std::isspace(ch);
}));
}
std::string ltrim_copy(std::string s) {
ltrim(s);
return s;
}
public:
ParserDense(const char* path,std::function<void(idx_t,std::vector<std::pair<int,value_t>>)> consume) : consume(consume){
auto fp = fopen(path,"r");
if(fp == NULL){
Logger::log(Logger::ERROR,"File not found at (%s)\n",path);
exit(1);
}
std::unique_ptr<char[]> buff(new char[MAX_LINE]);
std::vector<std::string> buffers;
idx_t idx = 0;
std::string field_buffer;
while(fgets(buff.get(),MAX_LINE,fp)){
std::vector<std::pair<int,value_t>> vec_sample;
vec_sample.reserve(200);
int index=0;
std::string tmp_str = std::string(buff.get());
for (const auto &it : split(tmp_str, ' ')) {
field_buffer = ltrim_copy(std::string(it.begin(), it.end()));
// value_t val;
// sscanf(buff + tokens[i] + 1,"%d",&index);
// sscanf(field_buffer,"%lf",&val);
vec_sample.push_back(std::make_pair(index,atof(field_buffer.c_str())));
index++;
}
consume(idx,vec_sample);
++idx;
}
fclose(fp);
}
};