-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_tweets.php
142 lines (125 loc) · 4.59 KB
/
parse_tweets.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
<?php
include 'ElasticConnectorForTweetStore.php';
$parseTweets = new ParseTweets();
if($parseTweets!= null)
while (true) {
$parseTweets->parseFromCache();
}
class ParseTweets {
private $mysqlObj;
private $esObj;
private $errorLogFile = 'parse_error_log.txt';
function __construct() {
require_once('db_lib.php');
$this->mysqlObj = new db;
try {
$this->esObj = new ElasticConnectorForTweetStore();
} catch(Exception $e) {
$log = '';
do {
$log .= "{$e->getFile()}: {$e->getLine()} {$e->getMessage()} ({$e->getCode()}), " . get_class($e) . "\r\n";
} while ($e = $e->getPrevious());
$this->log_error($log);
return null;
}
date_default_timezone_set('UTC');
return $this;
}
public function parseFromCache() {
$query = 'SELECT cache_id, raw_tweet ' .
'FROM json_cache LIMIT 10000';
$result = $this->mysqlObj->select($query);
$docsToInsert = array();
while($row = mysqli_fetch_assoc($result)) {
$cache_id = $row['cache_id'];
$tweet_object = unserialize(base64_decode($row['raw_tweet']));
$this->mysqlObj->select("DELETE FROM json_cache WHERE cache_id = $cache_id");
if($tweet_object->lang <> 'en' && $tweet_object->lang <> 'pt') continue;
//prepare tweet object
$tobj = $this->getTweet($tweet_object);
if(isset($tobj) && isset($tobj['user']) && $tobj['user'] != null)
$docsToInsert[] = $this->esObj->getDocObj($tweet_object->id, $tobj) ;
}
if(count($docsToInsert) > 0)
try {
$this->esObj->bulkInsert($docsToInsert);
}catch(Exception $e) {
$log = '';
do {
$log .= "{$e->getFile()}: {$e->getLine()} {$e->getMessage()} ({$e->getCode()}), " . get_class($e) . "\r\n";
} while ($e = $e->getPrevious());
$this->log_error($log);
}
sleep(60);
}
private function getTweet($tweet_object) {
$retweeted_status = null;
if(isset($tweet_object->retweeted_status) && $tweet_object->retweeted_status != null)
$retweeted_status = $this->getTweet($tweet_object->retweeted_status);
return array(
"created_at" => $this->getDate($tweet_object->created_at),
"favorite_count" => $this->getInt($tweet_object->favorite_count),
"id" => $this->getInt($tweet_object->id),
"id_str" => $this->getStr($tweet_object->id_str),
"in_reply_to_status_id_str" => $this->getStr($tweet_object->in_reply_to_status_id_str),
"in_reply_to_user_id_str" => $this->getStr($tweet_object->in_reply_to_user_id_str),
"lang" => $this->getStr($tweet_object->lang),
"retweet_count" => $this->getInt($tweet_object->retweet_count),
"text" => $this->getStr($tweet_object->text),
"retweeted_status" => $retweeted_status,
"user" => $this->getUser($tweet_object->user)
);
}
private function getUser($user) {
if(!isset($user))
return null;
return array(
"created_at" => $this->getDate($user->created_at),
"favourites_count" => $this->getInt($user->favourites_count),
"followers_count" => $this->getInt($user->followers_count),
"friends_count" => $this->getInt($user->friends_count),
"id" => $this->getInt($user->id),
"id_str" => $this->getStr($user->id_str),
"listed_count" => $this->getInt($user->listed_count),
"location" => $this->getStr($user->location),
"name" => $this->getStr($user->name),
"profile_image_url" => $this->getStr($user->profile_image_url),
"screen_name" => $this->getStr($user->screen_name),
"statuses_count" => $this->getInt($user->statuses_count),
"time_zone" => $this->getStr($user->time_zone),
"verified" => $this->getBool($user->verified),
);
}
private function getInt($value) {
if(isset($value) && is_int($value))
return $value;
else
return null;
}
private function getStr($value) {
if(isset($value) && is_string($value))
return $value;
else
return null;
}
private function getBool($value) {
if(isset($value) && is_bool($value))
return $value;
else
return null;
}
private function getDate($value) {
if(isset($value))
return date("Y-m-d H:i:s", strtotime($value));
else
return null;
}
private function log_error($msg) {
$fp = fopen($this->errorLogFile,'a');
fwrite($fp, date(DATE_RFC822) . ' | ' .
$_SERVER["SCRIPT_NAME"] . ' -> ' . $msg. "\n");
fclose($fp);
//mail('', 'tweet-store process error', $msg);
}
}
?>