Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@ abstract public function getCookiesForUrl($target_url);
*/
abstract public function cleanup();
}
?>
Original file line number Diff line number Diff line change
Expand Up @@ -5,89 +5,88 @@
* @package phpcrawl
* @internal
*/
class PHPCrawlerMemoryCookieCache extends PHPCrawlerCookieCacheBase
{
protected $cookies = array();

/**
* Adds a cookie to the cookie-cache.
*
* @param PHPCrawlerCookieDescriptor $Cookie The cookie to add.
*/
public function addCookie(PHPCrawlerCookieDescriptor $Cookie)
{
$source_domain = $Cookie->source_domain;
$cookie_domain = $Cookie->domain;
$cookie_path = $Cookie->path;
$cookie_name = $Cookie->name;

$cookie_hash = md5($cookie_domain."_".$cookie_path."_".$cookie_name);

$this->cookies[$source_domain][$cookie_hash] = $Cookie;
}

/**
* Adds a bunch of cookies to the cookie-cache.
*
* @param array $cookies Numeric array conatinin the cookies to add as PHPCrawlerCookieDescriptor-objects
*/
public function addCookies($cookies)
{
for ($x=0; $x<count($cookies); $x++)
{
$this->addCookie($cookies[$x]);
class PHPCrawlerMemoryCookieCache extends PHPCrawlerCookieCacheBase {

protected $cookies = array();

/**
* Adds a cookie to the cookie-cache.
*
* @param PHPCrawlerCookieDescriptor $Cookie The cookie to add.
*/
public function addCookie(PHPCrawlerCookieDescriptor $Cookie) {
$source_domain = $Cookie->source_domain;
$cookie_domain = $Cookie->domain;
$cookie_path = $Cookie->path;
$cookie_name = $Cookie->name;

$cookie_hash = md5($cookie_domain . "_" . $cookie_path . "_" . $cookie_name);

$this->cookies[$source_domain][$cookie_hash] = $Cookie;
}
}

/**
* Returns all cookies from the cache that are adressed to the given URL
*
* @param string $target_url The target-URL
* @return array Numeric array conatining all matching cookies as PHPCrawlerCookieDescriptor-objects
*/
public function getCookiesForUrl($target_url)
{
$url_parts = PHPCrawlerUtils::splitURL($target_url);

$target_domain = $url_parts["domain"]; // e.g. acme.com

$return_cookies = array();

// Iterate over all cookies of this domain
@reset($this->cookies[$target_domain]);
while (list($hash) = @each($this->cookies[$target_domain]))
{
$Cookie = $this->cookies[$target_domain][$hash];

// Does the cookie-domain match?
// Tail-matching, see http://curl.haxx.se/rfc/cookie_spec.html:
// A domain attribute of "acme.com" would match host names "anvil.acme.com" as well as "shipping.crate.acme.com"
// Seems like ".acme.com" should also match "anvil.acme.com", so just remove the dot

$Cookie->domain = preg_replace("#^.#", "", $Cookie->domain);

if ($Cookie->domain == $url_parts["host"] || preg_match("#".preg_quote($Cookie->domain)."$#", $url_parts["host"]))
{
// Does the path match?
if (preg_match("#^".preg_quote($Cookie->path)."#", $url_parts["path"]))
{
$return_cookies[$Cookie->name] = $Cookie; // Use cookie-name as index to avoid double-cookies

/**
* Adds a bunch of cookies to the cookie-cache.
*
* @param array $cookies Numeric array conatinin the cookies to add as PHPCrawlerCookieDescriptor-objects
*/
public function addCookies($cookies) {
for ($x = 0; $x < count($cookies); $x++) {
$this->addCookie($cookies[$x]);
}
}
}

// Convert to numeric array
$return_cookies = array_values($return_cookies);

return $return_cookies;
}

/**
* Cleans up the cache after is it not needed anymore.
*/
public function cleanup()
{
$this->cookies = array();
}

/**
* Returns all cookies from the cache that are adressed to the given URL
*
* @param string $target_url The target-URL
* @return array Numeric array conatining all matching cookies as PHPCrawlerCookieDescriptor-objects
*/
public function getCookiesForUrl($target_url) {
$url_parts = PHPCrawlerUtils::splitURL($target_url);

$target_domain = $url_parts["domain"]; // e.g. acme.com

$return_cookies = array();

// Iterate over all cookies of this domain
//$this->cookies[$url_parts['domain']]['domain'] += $url_parts['host'];
if (isset($this->cookies[$target_domain])) {
reset($this->cookies[$target_domain]);
}
if (isset($this->cookies[$target_domain])) {

// while (list($hash) = each($this->cookies[$target_domain])) {
foreach ($this->cookies[$target_domain] as $ĥash) {
$Cookie = $this->cookies[$target_domain][$hash];

// Does the cookie-domain match?
// Tail-matching, see http://curl.haxx.se/rfc/cookie_spec.html:
// A domain attribute of "acme.com" would match host names "anvil.acme.com" as well as "shipping.crate.acme.com"
// Seems like ".acme.com" should also match "anvil.acme.com", so just remove the dot

$Cookie->domain = preg_replace("#^\.#", "", $Cookie->domain);

if ($Cookie->domain == $url_parts["host"] || preg_match("#" . preg_quote($Cookie->domain) . "$#", $url_parts["host"])) {
// Does the path match?
if (preg_match("#^" . preg_quote($Cookie->path) . "#", $url_parts["path"])) {
$return_cookies[$Cookie->name] = $Cookie; // Use cookie-name as index to avoid double-cookies
}
}
}
}

// Convert to numeric array
$return_cookies = array_values($return_cookies);

return $return_cookies;
}

/**
* Cleans up the cache after is it not needed anymore.
*/
public function cleanup() {
$this->cookies = array();
}

}
?>
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,3 @@ public function cleanup()
unlink($this->sqlite_db_file);
}
}
?>
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@ class PHPCrawlerAbortReasons
*/
const ABORTREASON_USERABORT = 4;
}
?>
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,3 @@ class PHPCrawlerHTTPProtocols
*/
const HTTP_1_1 = 2;
}
?>
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,3 @@ class PHPCrawlerMultiProcessModes
*/
const MPMODE_CHILDS_EXECUTES_USERCODE = 2;
}
?>
Loading