-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_extract.cpp
83 lines (74 loc) · 2.26 KB
/
data_extract.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <unordered_set>
using namespace std;
string getValuesForTable(string line);
int main(){
ifstream file("Crops_AllData_Normalized.csv");
int counter = 0;
string line = "";
ofstream out("out.txt");
unordered_set<string> duplicate;
while (!file.eof())
{
string text = "";
getline (file, text);
line = getValuesForTable(text);
auto finder = duplicate.find(line);
if(finder == duplicate.end())
{
out << line;
duplicate.emplace(line);
}
//cout << text << endl;
//out << line;
}
out << "commit;";
file.close();
out.close();
return 0;
}
string getValuesForTable(string line)
{
vector<int> location;
int index = line.find('"');
const string first = "INSERT INTO ELEMENT VALUES ("; // change as needed
string productID = "";
const string last = ");\n";
string elementCode = "";
string ename = "";
string unit = "";
string value = "";
string year = "";
string cID = "";
string cName = "";
string productName = "";
string output = "";
while(index != -1)
{
location.push_back(index);
index = line.find('"', index + 1);
}
cID = line.substr(location[0] + 1, location[1] - (location[0] + 1));
cName = line.substr(location[2] + 1, location[3] - (location[2] + 1));
productID = line.substr(location[4] + 1, location[5] - (location[4] + 1));
productName = line.substr(location[6] + 1, location[7] - (location[6] + 1));
elementCode = line.substr(location[8] + 1, location[9] - (location[8] + 1));
ename = line.substr(location[10] + 1, location[11] - (location[10] + 1));
year = line.substr(location[14] + 1, location[15] - (location[14] + 1));
unit = line.substr(location[16] + 1, location[17] - (location[16] + 1));
value = line.substr(location[18] + 1, location[19] - (location[18] + 1));
try{
stof(value);
}
catch(const std::invalid_argument& e)
{
value = "NULL";
}
output = first + elementCode + "," + productID + "," + "'" + ename + "'" + "," + "'" + unit + "'" + "," + value + "," + year + last;
// make sure to only grab values needed for your table
location.resize(0);
return output;
}