Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add curl library to support multiple proxies #477

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 32 additions & 22 deletions bridges/FacebookBridge.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<?php
require __DIR__ . '/../lib/contents_curl.php';
class FacebookBridge extends BridgeAbstract {

const MAINTAINER = 'teromene';
Expand Down Expand Up @@ -87,6 +88,7 @@ function extractFromDelimiters($string, $start, $end){
$captcha_action = $_SESSION['captcha_action'];
$captcha_fields = $_SESSION['captcha_fields'];
$captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
/*
$http_options = array(
'http' => array(
'method' => 'POST',
Expand All @@ -98,25 +100,29 @@ function extractFromDelimiters($string, $start, $end){
);
$context = stream_context_create($http_options);
$html = getContents($captcha_action, false, $context);
*/
list($html, $info, $res_header, $proxy) = curlgetContents($captcha_action, $captcha_fields, true);
if ( $info['http_code'] != 200 )
returnServerError('Error '.$info['http_code'].$captcha_action."\nReq:\n".$res_header."\nResp:\n".$info['request_header']."\nProxy:\n".$proxy);

if($html === false){
returnServerError('Failed to submit captcha response back to Facebook');
}
unset($_SESSION['captcha_fields']);
$html = str_get_html($html);
}
unset($_SESSION['captcha_fields']);
unset($_SESSION['captcha_action']);
}

$res_header = '';
$proxy = '';
//Retrieve page contents
if(is_null($html)){
if(!strpos($this->getInput('u'), "/")){
$html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1')
or returnServerError('No results for this query.');
if (is_null($html)) {
if (!strpos($this->getInput('u'), "/")) {
list($html, $info, $res_header, $proxy) = curlgetSimpleHTMLDOM(self::URI.urlencode($this->getInput('u')).'?_fb_noscript=1');
if ( $info['http_code'] != 200 )
returnServerError('Error '.print_r($info, true)."\nResp:\n".$res_header."\nReq:\n".$info['request_header']."\nProxy:\n".$proxy);
} else {
$html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1')
or returnServerError('No results for this query.');
list($html, $info) = curlgetSimpleHTMLDOM(self::URI.'pages/'.$this->getInput('u').'?_fb_noscript=1');
if ( $info['http_code'] != 200 ) returnServerError('No results for this query.');
}
}

Expand Down Expand Up @@ -145,6 +151,8 @@ function extractFromDelimiters($string, $start, $end){
<p><img src="data:image/png;base64,{$img}" /></p>
<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />
<input type="submit" value="Submit!" /></p>
<pre>{$res_header}</pre>
<p>Proxy: <pre>{$proxy}</pre></p>
</form>
EOD;
die($message);
Expand Down Expand Up @@ -172,6 +180,18 @@ function extractFromDelimiters($string, $start, $end){
// Ignore summary posts
if(strpos($post->class, '_3xaf') !== false) continue;

// Determine post attachments
/*
$attachment_wrapper = $post->find('._3x-2')[0];// search for attachment
if ( isset($attachment_wrapper) ) {
$attachment = $attachment_wrapper->find('.mtm')[0]->children(0);
if ( strpos($attachment->class, '_2a2q') !== false ) {
// photos
} elseif ( strpos($attachment->class, '_6m2') !== false ) {
// link
}
}*/

$item = array();

if(count($post->find('abbr')) > 0){
Expand Down Expand Up @@ -236,13 +256,8 @@ function extractFromDelimiters($string, $start, $end){
$date = 0;
}

//Build title from username and content
$title = $author;
if(strlen($title) > 24)
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
$title = $title . ' | ' . strip_tags($content);
if(strlen($title) > 64)
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
//Build title from content
$title = mb_substr(strip_tags($post->find('.userContent > p')[0]->innertext), 0, 20).'...';

//Build and add final item
$item['uri'] = self::URI . $post->find('abbr')[0]->parent()->getAttribute('href');
Expand All @@ -257,11 +272,6 @@ function extractFromDelimiters($string, $start, $end){
}

public function getName(){
if(!empty($this->authorName)){
return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName
. ' - Facebook Bridge';
}

return parent::getName();
return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName.' - Facebook Bridge';
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please split this file into a separate PR

79 changes: 79 additions & 0 deletions lib/contents_curl.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php
function curlgetContents( $url, $params, $post=false){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $post ? $url : $url.'?'.http_build_query($params) );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

#curl_setopt($ch, CURLOPT_COOKIEJAR, '/tmp/rssbridge-fb-cookies.txt');
#curl_setopt($ch, CURLOPT_COOKIEFILE, '/tmp/rssbridge-fb-cookies.txt');
$files = array_diff(scandir(__DIR__.'/../proxylist/'), array('.', '..'));
$proxies = [];
foreach($files as $file) {
$proxies_str = file_get_contents(__DIR__.'/../proxylist/'.$file);
$proxies = array_merge($proxies, explode("\n", $proxies_str, -1));
}
$proxy = $proxies[array_rand($proxies)];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this work if no proxy is specified (no files in the 'proxylist/' directory)?

curl_setopt($ch, CURLOPT_PROXY, $proxy);
$proxy_d = print_r($proxy, true);

curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLINFO_HEADER_OUT, 1);

if ( $post ) {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($params));
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/x-www-form-urlencoded',
'User-Agent: '.ini_get('user_agent'),
));
} else {
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'User-Agent: '.ini_get('user_agent'),
));
}

$response = curl_exec($ch);
$info = curl_getinfo($ch);

$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
$body = substr($response, $header_size);

if($errno = curl_errno($ch)) {
$error_message = curl_strerror($errno);
$info = "cURL error ({$errno}):\n {$error_message}";
}
curl_close($ch);
#file_put_contents(__DIR__.'/../debug/D'.date('H-i-s').'.html', $body);

rewind($verbose);
$verboseLog = stream_get_contents($verbose);

return array($body, $info, $header, $proxy_d);

}
function curlgetSimpleHTMLDOM($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT
){
list($body, $info, $header, $proxy) = curlgetContents($url, $use_include_path, $context, $offset, $maxLen);
return array(str_get_html($body
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText),
$info, $header, $proxy);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any need to know $info, $header, $proxy?

}
?>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please have a look at the Travis-CI results. Also checkout the new contributions guidelines regarding coding style.

Empty file added proxylist/.gitkeep
Empty file.