|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace Tests\Keboola\Db\ImportExport; |
| 6 | + |
| 7 | +use Keboola\Csv\CsvOptions; |
| 8 | +use Keboola\Db\ImportExport\ImportOptions; |
| 9 | +use Keboola\Db\ImportExport\Storage; |
| 10 | +use Keboola\Temp\Temp; |
| 11 | +use MicrosoftAzure\Storage\Common\Internal\Resources; |
| 12 | +use Symfony\Component\Stopwatch\Stopwatch; |
| 13 | + |
| 14 | +class HugeManifest |
| 15 | +{ |
| 16 | + private const MANIFEST_FILE_NAME = 'hugeManifest.json'; |
| 17 | + private const SLICES_TOTAL = 500 * 1000; |
| 18 | + use ABSSourceTrait; |
| 19 | + |
| 20 | + /** |
| 21 | + * @var string |
| 22 | + */ |
| 23 | + private $manifestFile; |
| 24 | + |
| 25 | + /** |
| 26 | + * @var AbsLoader |
| 27 | + */ |
| 28 | + private $loader; |
| 29 | + |
| 30 | + /** |
| 31 | + * @var Stopwatch |
| 32 | + */ |
| 33 | + private $stopwatch; |
| 34 | + |
| 35 | + /** |
| 36 | + * @var Temp |
| 37 | + */ |
| 38 | + private $temp; |
| 39 | + |
| 40 | + /** |
| 41 | + * @var string |
| 42 | + */ |
| 43 | + private $containerName; |
| 44 | + |
| 45 | + /** |
| 46 | + * @var string |
| 47 | + */ |
| 48 | + private $accountName; |
| 49 | + |
| 50 | + public function __construct( |
| 51 | + string $accountName, |
| 52 | + string $containerName |
| 53 | + ) { |
| 54 | + $this->accountName = $accountName; |
| 55 | + $this->containerName = $containerName; |
| 56 | + $this->loader = new AbsLoader($accountName, $containerName); |
| 57 | + } |
| 58 | + |
| 59 | + public function run(): void |
| 60 | + { |
| 61 | + $this->temp = new Temp(); |
| 62 | + $this->temp->initRunFolder(); |
| 63 | + |
| 64 | + $this->stopwatch = new Stopwatch(); |
| 65 | + $this->stopwatch->start('test'); |
| 66 | + $this->loader->deleteContainer(); |
| 67 | + $this->loader->createContainer(); |
| 68 | + |
| 69 | + $this->stopwatch->start('upload'); |
| 70 | + $this->uploadManifestAndSlices(); |
| 71 | + $event = $this->stopwatch->stop('upload'); |
| 72 | + echo 'max memory upload: ' . $event->getMemory() . PHP_EOL; |
| 73 | + |
| 74 | + $this->stopwatch->start('commands'); |
| 75 | + $this->generateCommands(); |
| 76 | + $event = $this->stopwatch->stop('commands'); |
| 77 | + echo 'max memory commands: ' . $this->getMemoryForHuman($event->getMemory()) . PHP_EOL; |
| 78 | + |
| 79 | + $this->loader->deleteContainer(); |
| 80 | + $event = $this->stopwatch->stop('test'); |
| 81 | + echo 'max memory: ' . $this->getMemoryForHuman($event->getMemory()) . PHP_EOL; |
| 82 | + } |
| 83 | + |
| 84 | + private function uploadManifestAndSlices(): void |
| 85 | + { |
| 86 | + $this->printMemory(); |
| 87 | + $manifest = $this->openManifestFile(); |
| 88 | + |
| 89 | + echo 'Generating manifest' . PHP_EOL; |
| 90 | + for ($i = 0; $i <= self::SLICES_TOTAL; $i++) { |
| 91 | + $sliceName = sprintf('my_awesome_long_name_slice.csv_%d', $i); |
| 92 | + fwrite($manifest, sprintf( |
| 93 | + '{"url":"%s"}%s' . PHP_EOL, |
| 94 | + $this->getAbsUrl($sliceName), |
| 95 | + $i === self::SLICES_TOTAL ? '' : ',' |
| 96 | + )); |
| 97 | + } |
| 98 | + |
| 99 | + $this->closeManifestFile($manifest); |
| 100 | + |
| 101 | + echo PHP_EOL; |
| 102 | + |
| 103 | + echo 'Uploading manifest' . PHP_EOL; |
| 104 | + |
| 105 | + $this->loader->getBlobService()->createBlockBlob( |
| 106 | + $this->containerName, |
| 107 | + self::MANIFEST_FILE_NAME, |
| 108 | + file_get_contents($this->getManifestFileName()) |
| 109 | + ); |
| 110 | + |
| 111 | + echo sprintf('Manifest file size: %s bytes', filesize($this->getManifestFileName())) . PHP_EOL; |
| 112 | + $this->printMemory(); |
| 113 | + } |
| 114 | + |
| 115 | + private function printMemory(): void |
| 116 | + { |
| 117 | + $memUsage = memory_get_usage(true); |
| 118 | + |
| 119 | + echo $this->getMemoryForHuman($memUsage); |
| 120 | + |
| 121 | + echo PHP_EOL; |
| 122 | + } |
| 123 | + |
| 124 | + private function getMemoryForHuman(int $memUsage): string |
| 125 | + { |
| 126 | + if ($memUsage < 1024) { |
| 127 | + return $memUsage . ' bytes'; |
| 128 | + } elseif ($memUsage < 1048576) { |
| 129 | + return round($memUsage / 1024, 2) . ' kilobytes'; |
| 130 | + } else { |
| 131 | + return round($memUsage / 1048576, 2) . ' megabytes'; |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + /** |
| 136 | + * @return false|resource |
| 137 | + */ |
| 138 | + private function openManifestFile() |
| 139 | + { |
| 140 | + file_put_contents($this->getManifestFileName(), '{"entries":[' . PHP_EOL); |
| 141 | + return fopen($this->getManifestFileName(), 'a'); |
| 142 | + } |
| 143 | + |
| 144 | + private function getManifestFileName(): string |
| 145 | + { |
| 146 | + if ($this->manifestFile === null) { |
| 147 | + $this->manifestFile = $this->temp->getTmpFolder() . '/' . self::MANIFEST_FILE_NAME; |
| 148 | + } |
| 149 | + |
| 150 | + return $this->manifestFile; |
| 151 | + } |
| 152 | + |
| 153 | + private function getAbsUrl(string $fileName): string |
| 154 | + { |
| 155 | + return sprintf( |
| 156 | + 'azure://%s.%s/%s/%s', |
| 157 | + $this->accountName, |
| 158 | + Resources::BLOB_BASE_DNS_NAME, |
| 159 | + $this->containerName, |
| 160 | + $fileName |
| 161 | + ); |
| 162 | + } |
| 163 | + |
| 164 | + /** |
| 165 | + * @param resource $resource |
| 166 | + */ |
| 167 | + private function closeManifestFile($resource): void |
| 168 | + { |
| 169 | + fwrite($resource, ']}'); |
| 170 | + fclose($resource); |
| 171 | + } |
| 172 | + |
| 173 | + private function generateCommands(): void |
| 174 | + { |
| 175 | + $source = new Storage\ABS\SourceFile( |
| 176 | + $this->containerName, |
| 177 | + self::MANIFEST_FILE_NAME, |
| 178 | + $this->getCredentialsForAzureContainer($this->containerName), |
| 179 | + $this->accountName, |
| 180 | + new CsvOptions, |
| 181 | + true |
| 182 | + ); |
| 183 | + $destination = new Storage\Snowflake\Table('schema', 'table'); |
| 184 | + $options = new ImportOptions(); |
| 185 | + $adapter = new Storage\ABS\SnowflakeImportAdapter($source); |
| 186 | + echo 'Generating commands' . PHP_EOL; |
| 187 | + foreach ($adapter->getCopyCommands( |
| 188 | + $destination, |
| 189 | + $options, |
| 190 | + 'stagingTable' |
| 191 | + ) as $index => $cmd) { |
| 192 | + $this->stopwatch->lap('commands'); |
| 193 | + }; |
| 194 | + } |
| 195 | +} |
0 commit comments