Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More verbosity and delayed retries #11

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions mirror.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@
throw new \ErrorException($errstr, $errno, E_ERROR, $errfile, $errline);
});

Class DateTimeExt extends DateTime {

public function __construct(string $timestamp) {
parent::__construct();
$this->createFromFormat('u', $timestamp);
}

public function __toString() {
return $this->format('Y-m-d H:i');
}
}

class Mirror {
private $target;
private $context;
Expand Down Expand Up @@ -78,12 +90,13 @@ public function syncRootOnV2()
$hash = hash('sha256', $rootData);

if ($hash === $this->getHash('/packages.json')) {
$this->output('No work - /packages.json hash unchanged' . PHP_EOL);
return;
}

$gzipped = gzencode($rootData, 8);
$this->write('/packages.json', $rootData, $gzipped, strtotime($rootResp->getHeaders()['last-modified'][0]));
$this->output('X');
$this->output(' X(P) ');

$this->statsdIncrement('mirror.sync_root');
}
Expand All @@ -95,6 +108,7 @@ public function getV2Timestamp(): int
$resp = $this->client->request('GET', $this->apiUrl.'/metadata/changes.json', ['headers' => ['Host' => parse_url($this->apiUrl, PHP_URL_HOST)]]);
$content = json_decode($resp->getContent(false), true);
if ($resp->getStatusCode() === 400 && null !== $content) {
$this->output('API Timestamp is '. new DateTimeExt($content['timestamp']).PHP_EOL);
return $content['timestamp'];
}
throw new \Exception('Failed to fetch timestamp from API, got invalid response '.$resp->getStatusCode().': '.$resp->getContent());
Expand All @@ -117,15 +131,15 @@ public function syncV2()
throw new \UnexpectedValueException('Cannot save last timestamp to last_metadata_timestamp in '.getcwd().'. Make sure the file is writable.');
}
$lastTime = trim(file_get_contents($this->getTimestampStorePath()));

$this->output('Last update was on '.new DateTimeExt($lastTime).PHP_EOL);
$changesResp = $this->client->request('GET', $this->apiUrl.'/metadata/changes.json?since='.$lastTime, ['headers' => ['Host' => parse_url($this->apiUrl, PHP_URL_HOST)]]);
if ($changesResp->getHeaders()['content-encoding'][0] !== 'gzip') {
throw new \Exception('Expected gzip encoded responses, something is off');
}
$changes = json_decode($changesResp->getContent(), true);

if ([] === $changes['actions']) {
$this->output('No work' . PHP_EOL);
$this->output('No changes since '. new DateTimeExt($lastTime) . PHP_EOL);
Stef-33560 marked this conversation as resolved.
Show resolved Hide resolved
$this->writeLastTimestamp($changes['timestamp']);
return true;
}
Expand All @@ -147,7 +161,6 @@ public function syncV2()
$this->delete($action['package']);
}
}

$result = $this->downloadV2Files($requests);
if (!$result) {
return false;
Expand All @@ -162,7 +175,7 @@ public function syncV2()

public function resync(int $timestamp)
{
$this->output('Resync requested'.PHP_EOL);
$this->output('Resync requested from '. new DateTimeExt($timestamp) .PHP_EOL);

$listingResp = $this->client->request('GET', $this->apiUrl.'/packages/list.json?'.md5(uniqid()), ['headers' => ['Host' => parse_url($this->apiUrl, PHP_URL_HOST)]]);
if ($listingResp->getHeaders()['content-encoding'][0] !== 'gzip') {
Expand Down Expand Up @@ -222,7 +235,7 @@ public function resync(int $timestamp)
$appendRequest('/p2/'.$pkg.'.json');
$appendRequest('/p2/'.$pkg.'~dev.json');
}

$result = $this->downloadV2Files($requests);
if (!$result) {
return false;
Expand Down Expand Up @@ -270,15 +283,16 @@ private function downloadV2Files(array $requests)

// got an outdated file, possibly fetched from a mirror which was not yet up to date, so retry after 2sec
if ($is404 || $mtime < $userData['minimumFilemtime']) {
if ($userData['retries'] > 2) {
// 404s after 3 retries should be deemed to have really been deleted, so we stop retrying
sleep($userData['retries']);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not necessary IMO. It adds longer sleep but with the addition of 10 retries we anyway get 10x2sec sleep which should really be more than enough for everything to sync up upstream.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this exponentiel timeout time, even with 10 times, my mirror fails.
I don't know why, maybe an issue with my ISP, with CloudFlare or something like that ?

I retried with another ISP ; results :

git clone https://github.com/composer/mirror/
cat mirror.config.php
<?php

return [
    // directory where metadata files will get saved
    'target_dir' => './mymirror',
    // user agent describing your mirror node, if possible include domain name of mirror, and a contact email address
    'user_agent' => 'Just testing mirror script'/* TODO Mirror for foo.com (mycontact@example.org) */,
    // source repository URL
    'repo_url' => 'https://repo.packagist.org',
    // source repository hostname (optional, will guess from repo_url)
    //'repo_hostname' => 'repo.packagist.org',
    // source API URL
    'api_url' => 'https://repo.packagist.org',
    // how many times the script will run the mirroring step before exiting
    'iterations' => 1,
    // how many seconds to wait between mirror runs
    'iteration_interval' => 5,
    // set this to false if you do not run the --v1 mirror job, to ensure that the v2 will then take care of syncing packages.json
    'has_v1_mirror' => false,
];
./mirror.php --v2 -v

[...] lots of M [...]
[...] plenty of EEEEEEE [...]
Fatal error: Uncaught Symfony\Component\HttpClient\Exception\TransportException: Timeout was reached for "https://repo.packagist.org/p2/muxtor/yii2-pkk5-component.json". in /home/steph/composer-mirror/vendor/symfony/http-client/Response/CurlResponse.php:317

Result : 3.6Gb

With my PR, with the same config, no EEEE:

[...] lots of M [...]
[1]R[1]R[1]R[1]R[1]R[1]R[1]R[1]R[2]R[2]R[2]R[2]R[2]R[2]R[2]R[2]R[3]R[3]R[3]R[3]R[3]R[3]R[3]R[3]R[4]R[4]R[4]R[4]R[4]R[4]R[4]R[4]R[5]R[5]R[5]R[5]R[5]R[5]R[5]R[5]R[6]R[6]R[6]R[6]R[6]R[6]R[6]R[6]R[7]R[7]R[7]R[7]R[7]R[7]R[7]R[7]R[8]R[8]R[8]R[8]R[8]R[8]R[8]R[8]R[9]R[9]R[9]R[9]R[9]R[9]R[9]R[9]R[10]R[10]R[10]R[10]R[10]R[10]R[10]R[10]????????

Result : 6.2Gb

if ($userData['retries'] > 10) {
// 404s after 10 retries should be deemed to have really been deleted, so we stop retrying
if ($is404) {
return false;
}
throw new \Exception('Too many retries, could not update '.$userData['path'].' as the origin server returns an older file ('.$mtime.', expected '.$userData['minimumFilemtime'].')');
}
$hasRetries = true;
$this->output('R');
$this->output('R['.$userData['retries'].']');
$this->statsdIncrement('mirror.retry_provider_v2');
$userData['retries']++;
$headers = file_exists($this->target.$userData['path'].'.gz') ? ['If-Modified-Since' => gmdate('D, d M Y H:i:s T', filemtime($this->target.$userData['path'].'.gz'))] : [];
Expand Down Expand Up @@ -426,7 +440,7 @@ public function sync()
$hash = hash('sha256', $rootData);

if ($hash === $this->getHash('/packages.json')) {
$this->output('No work' . PHP_EOL);
$this->output('No work - /packages.json hash unchanged' . PHP_EOL);
return true;
}

Expand Down Expand Up @@ -530,7 +544,7 @@ public function sync()

$gzipped = gzencode($rootData, 8);
$this->write('/packages.json', $rootData, $gzipped, strtotime($rootResp->getHeaders()['last-modified'][0]));
$this->output('X');
$this->output(' X(P) ');
$this->statsdIncrement('mirror.sync_root');

$this->output(PHP_EOL);
Expand All @@ -551,6 +565,7 @@ public function gc()

$rootFile = $this->target.'/packages.json.gz';
if (!file_exists($rootFile)) {
$this->output($rootFile.' doesn\'t exists' . PHP_EOL);
return;
}
$rootJson = json_decode(gzdecode(file_get_contents($rootFile)), true);
Expand Down