Skip to content

Commit

Permalink
Add new curl function to limit how much of the file is returned (#2599)
Browse files Browse the repository at this point in the history
  • Loading branch information
janette authored and fmizzell committed Aug 9, 2018
1 parent db1cdd7 commit 7c0c20d
Showing 1 changed file with 40 additions and 2 deletions.
42 changes: 40 additions & 2 deletions modules/dkan/dkan_dataset/includes/getRemoteFileInfo.php
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,40 @@ public function getName() {
return NULL;
}

/**
* Helper function - If the server doesn't support HTTP HEAD, download $limit bytes.
*/
private function getPartialContent($url, $limit) {
$writefn = function($ch, $chunk) use ($limit, &$datadump) {
static $data = '';

$len = strlen($data) + strlen($chunk);
if ($len >= $limit) {
$data .= substr($chunk, 0, $limit - strlen($data));
$datadump = $data;
return -1;
}
$data .= $chunk;
return strlen($chunk);
};

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_WRITEFUNCTION, $writefn);
$data = curl_exec($ch);
curl_close($ch);

if ($datadump) {
$info = $this->parseRequestData($datadump);
return $info;
}

return FALSE;
}

/**
* Helper function.
*/
Expand All @@ -163,7 +197,6 @@ private function getFileInfoHelper($url, $no_body = TRUE) {
curl_setopt($ch, CURLOPT_NOBODY, 1);
}
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Range: bytes=0-1000"));

$ok = curl_exec($ch);

Expand Down Expand Up @@ -191,7 +224,12 @@ private function getFileInfo($url) {
return $info;
}

if ($info = $this->getFileInfoHelper($url, FALSE)) {
// If the above did not work that means the server doesn't support HTTP HEAD,
// and more often than not the server does not honor the Range header.
// (i.e. curl_setopt($ch, CURLOPT_HTTPHEADER, array("Range: bytes=0-1000")))
// So we will need to download a portion of the file (500 bytes) to get the info.
// Downloading the entire file can cause the harvest to fail with out of memory errors.
if ($info = $this->getPartialContent($url, 500)) {
return $info;
}

Expand Down

0 comments on commit 7c0c20d

Please sign in to comment.