diff --git a/analysis/common/functions.php b/analysis/common/functions.php index d7d50942..89c3cb3b 100644 --- a/analysis/common/functions.php +++ b/analysis/common/functions.php @@ -866,7 +866,7 @@ function current_collation() { $rec = $dbh->prepare($sql); $rec->execute(); while ($res = $rec->fetch(PDO::FETCH_ASSOC)) { - if (array_key_exists('Collation', $res) && ($res['Collation'] == 'utf8mb4_unicode_ci' || $res['Collation'] == 'utf8mb4_general_ci')) { + if (array_key_exists('Collation', $res) && substr($res['Collation'], 0, 7) === 'utf8mb4') { $is_utf8mb4 = true; break; } diff --git a/capture/common/form.trackphrases.php b/capture/common/form.trackphrases.php index d8ee7a32..ed3b0d78 100644 --- a/capture/common/form.trackphrases.php +++ b/capture/common/form.trackphrases.php @@ -19,6 +19,9 @@
  • exact phrases: ['global warming'] will get only tweets with the exact phrase. Beware, however that due to how the streaming API works, tweets are captured in the same way as in 2, but tweets that do not match the exact phrase are thrown away. This means that you will request many more tweets from the Twitter API than you will see in your query bin - thus increasing the possibility that you will hit a rate limit. E.g. if you specify a query like ['are we'] all tweets matching both [are] and [we] are retrieved, while DMI-TCAT only retains those with the exact phrase ['are we'].
  • + The phrases between commas can be maximum 60 characters long. +
    + You can track a maximum of 400 queries at the same time (for all query bins combined) and the total volume should never exceed 1% of global Twitter volume, at any specific moment in time.
    Example bin: globalwarming,global warming,'climate change' diff --git a/capture/index.php b/capture/index.php index f430a723..7c2c60b6 100644 --- a/capture/index.php +++ b/capture/index.php @@ -762,6 +762,11 @@ function validateQuery(query,type) { return false; } if(type == 'track') { + if(query.split(',').some(subq => subq.length > 60)) { + alert("Query phrases should not exceed 60 characters each. Please shorten your query phrases."); + return false; + }; + // if literal phrase, there should be no comma's in between if(query.indexOf("'")==-1) { return true; diff --git a/capture/query_manager.php b/capture/query_manager.php index 2ff85cd1..cd45f7ef 100644 --- a/capture/query_manager.php +++ b/capture/query_manager.php @@ -52,6 +52,17 @@ function create_new_bin($params) { echo '{"msg":"This capturing type is not defined in the config file"}'; return; } + if($type == 'track') { + $phrases = explode(",", $params["newbin_phrases"]); + $phrases = array_trim_and_unique($phrases); + foreach($phrases as $phrase) { + if(strlen($phrase) > 60) { + echo '{"msg":"Cannot add query because a phrase is too long."}'; + throw new LengthException('A query phrase exceeds 60 chrs.'); + return; + } + } + } $comments = sanitize_comments($params['newbin_comments']); // check whether the main query management tables are there, if not, create @@ -182,71 +193,77 @@ function remove_bin($params) { $bin_name = $results['querybin']; } - // delete tcat_query_bin table - $sql = "DELETE FROM tcat_query_bins WHERE id = :id"; - $delete_querybin = $dbh->prepare($sql); - $delete_querybin->bindParam(':id', $bin_id, PDO::PARAM_INT); - $delete_querybin->execute(); - - // delete periods associated with the query bin - $sql = "DELETE FROM tcat_query_bins_periods WHERE querybin_id = :id"; - $delete_querybin_periods = $dbh->prepare($sql); - $delete_querybin_periods->bindParam(':id', $bin_id, PDO::PARAM_INT); - $delete_querybin_periods->execute(); - - // delete phrase references associated with the query bin - $sql = "DELETE FROM tcat_query_bins_phrases WHERE querybin_id = :id"; - $delete_query_bins_phrases = $dbh->prepare($sql); - $delete_query_bins_phrases->bindParam(":id", $bin_id, PDO::PARAM_INT); - $delete_query_bins_phrases->execute(); - - // delete orphaned phrases - $sql = "DELETE FROM tcat_query_phrases where id not in ( select phrase_id from tcat_query_bins_phrases )"; - $delete_query_phrases = $dbh->prepare($sql); - $delete_query_phrases->execute(); - - // delete user references associated with the query bin - $sql = "DELETE FROM tcat_query_bins_users WHERE querybin_id = :id"; - $delete_query_bins_users = $dbh->prepare($sql); - $delete_query_bins_users->bindParam(":id", $bin_id, PDO::PARAM_INT); - $delete_query_bins_users->execute(); - - // delete orphaned users - $sql = "DELETE FROM tcat_query_users where id not in ( select user_id from tcat_query_bins_users )"; - $delete_query_users = $dbh->prepare($sql); - $delete_query_users->execute(); - - $sql = "DROP TABLE " . $bin_name . "_tweets"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_mentions"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_hashtags"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_urls"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_withheld"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_places"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - $sql = "DROP TABLE " . $bin_name . "_media"; - $delete_table = $dbh->prepare($sql); - $delete_table->execute(); - - echo '{"msg":"Query bin [' . $bin_name . ']has been deleted"}'; - - $dbh = false; + $dbh->beginTransaction(); + try { + // delete tcat_query_bin table + $sql = "DELETE FROM tcat_query_bins WHERE id = :id"; + $delete_querybin = $dbh->prepare($sql); + $delete_querybin->bindParam(':id', $bin_id, PDO::PARAM_INT); + $delete_querybin->execute(); + + // delete periods associated with the query bin + $sql = "DELETE FROM tcat_query_bins_periods WHERE querybin_id = :id"; + $delete_querybin_periods = $dbh->prepare($sql); + $delete_querybin_periods->bindParam(':id', $bin_id, PDO::PARAM_INT); + $delete_querybin_periods->execute(); + + // delete phrase references associated with the query bin + $sql = "DELETE FROM tcat_query_bins_phrases WHERE querybin_id = :id"; + $delete_query_bins_phrases = $dbh->prepare($sql); + $delete_query_bins_phrases->bindParam(":id", $bin_id, PDO::PARAM_INT); + $delete_query_bins_phrases->execute(); + + // delete orphaned phrases + $sql = "DELETE FROM tcat_query_phrases where id not in ( select phrase_id from tcat_query_bins_phrases )"; + $delete_query_phrases = $dbh->prepare($sql); + $delete_query_phrases->execute(); + + // delete user references associated with the query bin + $sql = "DELETE FROM tcat_query_bins_users WHERE querybin_id = :id"; + $delete_query_bins_users = $dbh->prepare($sql); + $delete_query_bins_users->bindParam(":id", $bin_id, PDO::PARAM_INT); + $delete_query_bins_users->execute(); + + // delete orphaned users + $sql = "DELETE FROM tcat_query_users where id not in ( select user_id from tcat_query_bins_users )"; + $delete_query_users = $dbh->prepare($sql); + $delete_query_users->execute(); + + $sql = "DROP TABLE " . $bin_name . "_tweets"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_mentions"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_hashtags"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_urls"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_withheld"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_places"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $sql = "DROP TABLE " . $bin_name . "_media"; + $delete_table = $dbh->prepare($sql); + $delete_table->execute(); + + $dbh->commit(); + + echo '{"msg":"Query bin [' . $bin_name . ']has been deleted"}'; + } catch (PDOException $e) { + error_log("Unable to remove bin '" . $bin_name . "': " . $e->getMessage()); + $dbh->rollBack(); + } } function pause_bin($params) { @@ -438,6 +455,19 @@ function modify_bin_comments($querybin_id, $params) { function modify_bin($params) { global $captureroles, $now; + $type = $params['type']; + if($type == 'track') { + $phrases = explode(",", $params["newphrases"]); + $phrases = array_trim_and_unique($phrases); + foreach($phrases as $phrase) { + if(strlen($phrase) > 60) { + echo '{"msg":"Cannot add query because a phrase is too long."}'; + throw new LengthException('A query phrase exceeds 60 chrs.'); + return; + } + } + } + if (!table_id_exists($params["bin"])) { echo '{"msg":"The bin ' . $params['bin'] . ' does not seem to exist"}'; return; @@ -446,7 +476,6 @@ function modify_bin($params) { if (array_key_exists('comments', $params) && $params['comments'] !== '') return modify_bin_comments($querybin_id, $params); - $type = $params['type']; if (array_search($type, $captureroles) === false && ($type !== 'geotrack' || array_search('track', $captureroles) === false)) { echo '{"msg":"This capturing type is not defined in the config file"}'; return; @@ -785,9 +814,14 @@ function getBins() { $querybins[$bin->id]->nrOfTweets = 0; $sql = "SELECT count(id) AS count FROM " . $bin->name . "_tweets"; $res = $dbh->prepare($sql); - if ($res->execute() && $res->rowCount()) { - $result = $res->fetch(); - $querybins[$bin->id]->nrOfTweets = $result['count']; + try { + if ($res->execute() && $res->rowCount()) { + $result = $res->fetch(); + $querybins[$bin->id]->nrOfTweets = $result['count']; + } + } catch (PDOException $e) { + error_log("Error retrieving tweet info for bin '" . $bin->name . "': " . $e->getMessage()); + unset($querybins[$bin->id]); } } $dbh = false; diff --git a/capture/search/search.php b/capture/search/search.php index 9df3e667..ad3aa05c 100644 --- a/capture/search/search.php +++ b/capture/search/search.php @@ -52,7 +52,7 @@ queryManagerCreateBinFromExistingTables($bin_name, $querybin_id, $type, explode("OR", $keywords)); -search($keywords); +while(search($keywords)); if ($tweetQueue->length() > 0) { $tweetQueue->insertDB(); } @@ -61,9 +61,10 @@ // TODO: see timeline.php for an improvement making it easier for users to start a bin immediatly after running a CLU script, and adept the method for this script -function search($keywords, $max_id = null) { +function search($keywords) { global $twitter_keys, $current_key, $ratefree, $bin_name, $dbh, $tweetQueue; - + static $max_id = null + $ratefree--; if ($ratefree < 1 || $ratefree % 10 == 0) { $keyinfo = getRESTKey($current_key, 'search', 'tweets'); @@ -122,12 +123,12 @@ function search($keywords, $max_id = null) { return false; } sleep(1); - search($keywords, $max_id); + return true; } else { echo $tmhOAuth->response['response'] . "\n"; if ($tmhOAuth->response['response']['errors']['code'] == 130) { // over capacity sleep(1); - search($keywords, $max_id); + return true; } } } diff --git a/common/functions.php b/common/functions.php index a74db564..045d2cf7 100644 --- a/common/functions.php +++ b/common/functions.php @@ -79,13 +79,21 @@ function controller_restart_roles($logtarget = "cli", $wait = false) { * Validates a given list of keywords, as entered as a parameter in capture/search/search.php for example */ function validate_capture_phrases($keywords) { + $valid = true; $illegal_chars = array( "\t", "\n", ";", "(", ")" ); foreach ($illegal_chars as $c) { if (strpos($keywords, $c) !== FALSE) { - return FALSE; + $valid = false; } } - return TRUE; + foreach ((explode(' OR ', $keywords)) as $keyword) { + $keyword = trim($keyword); + $keyword = preg_replace('/\s+/', ' ', $keyword); + if (strlen($keyword) > 60) { + $valid = false; + } + } + return $valid; } /**