forked from trmurakami/rppbci
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoleta_aminer.php
85 lines (72 loc) · 3.41 KB
/
coleta_aminer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
<!DOCTYPE html>
<?php
include('inc/config.php');
include('inc/functions.php');
$query["query"]["query_string"]["query"] = "-_exists_:aminer";
$query['sort'] = [
['ano.keyword' => ['order' => 'desc']],
];
$params = [];
$params["index"] = $index;
$params["type"] = $type;
$params["size"] = 50;
$params["body"] = $query;
$cursor = $client->search($params);
$total = $cursor["hits"]["total"];
echo 'Faltam: '.$total.'<br/><br/>';
foreach ($cursor["hits"]["hits"] as $r) {
if ($r["_source"]["titulo"] == "Editorial") {
echo "Editorial";
$update_aminer["doc"]["aminer"]["date"] = date("Ymd");
$update_aminer["doc_as_upsert"] = true;
$result_aminer = elasticsearch::elastic_update($r['_id'],$type,$update_aminer);
print_r($result_aminer);
unset($update_aminer);
} else {
$aminer = metrics::get_aminer($r["_source"]["titulo"]);
print_r($r["_source"]["titulo"]);
echo "<br/>";
if(count($aminer["result"]) > 0 ){
similar_text($r["_source"]["titulo"], $aminer["result"][0]["title"], $percent);
echo 'Percentual de: '.$percent.'';
if ($percent > 90) {
//print_r($aminer);
if (!empty($aminer["result"][0]["venue"]["name"])) {
similar_text($r["_source"]["source"], $aminer["result"][0]["venue"]["name"], $percent_source);
echo 'Percentual do Título do periódico de: '.$percent_source.'';
if ($percent_source > 90) {
$update_aminer["doc"]["aminer"] = $aminer["result"][0];
$update_aminer["doc"]["aminer"]["date"] = date("Ymd");
$update_aminer["doc_as_upsert"] = true;
$result_aminer = elasticsearch::elastic_update($r['_id'],$type,$update_aminer);
print_r($result_aminer);
unset($update_aminer);
} else {
$update_aminer["doc"]["aminer"]["date"] = date("Ymd");
$update_aminer["doc_as_upsert"] = true;
$result_aminer = elasticsearch::elastic_update($r['_id'],$type,$update_aminer);
print_r($result_aminer);
unset($update_aminer);
}
} else {
$update_aminer["doc"]["aminer"] = $aminer["result"][0];
$update_aminer["doc"]["aminer"]["date"] = date("Ymd");
$update_aminer["doc_as_upsert"] = true;
$result_aminer = elasticsearch::elastic_update($r['_id'],$type,$update_aminer);
print_r($result_aminer);
unset($update_aminer);
}
} else {
$update_aminer["doc"]["aminer"]["date"] = date("Ymd");
$update_aminer["doc_as_upsert"] = true;
$result_aminer = elasticsearch::elastic_update($r['_id'],$type,$update_aminer);
print_r($result_aminer);
unset($update_aminer);
}
}
//print_r($body);
echo '<br/><br/>';
//sleep(5);
}
}
?>