Skip to content

Commit

Permalink
/download endpoint implemented
Browse files Browse the repository at this point in the history
The /download endpoint allows a batch download of multiple resources in
a zip file.

Also, the content-disposition response heeader for binaries uses the
*=UTF-8''{fileName} syntax now to clearly indicate the file name
encoding.
  • Loading branch information
zozlak committed Nov 12, 2024
1 parent 51d63d5 commit 737e25b
Show file tree
Hide file tree
Showing 11 changed files with 464 additions and 13 deletions.
4 changes: 3 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"zozlak/http-accept": ">=0.1.0 <1",
"zozlak/logging": "^1",
"zozlak/rdf-constants": "^1",
"php-amqplib/php-amqplib": "^3.1"
"php-amqplib/php-amqplib": "^3.1",
"maennchen/zipstream-php": "^3.1"
},
"autoload": {
"psr-4": {
Expand All @@ -29,6 +30,7 @@
}
},
"require-dev": {
"ext-zip": "*",
"phpunit/phpunit": "*",
"zozlak/yaml-merge": "^1",
"phpstan/phpstan": "*"
Expand Down
6 changes: 6 additions & 0 deletions config-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ rest:
withReferences: X-WITH-REFERENCES
resourceProperties: X-RESOURCE-PROPERTIES
relativesProperties: X-RELATIVES-PROPERTIES
download:
# store or deflate
compressionMethod: store
compressionLevel: ~
fileName: data.zip
strict: false
schema:
id: https://vocabs.acdh.oeaw.ac.at/schema#hasIdentifier
label: https://vocabs.acdh.oeaw.ac.at/schema#hasTitle
Expand Down
9 changes: 6 additions & 3 deletions src/acdhOeaw/arche/core/Auth.php
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public function checkCreateRights(): void {
}

public function checkAccessRights(int $resId, string $privilege,
bool $metadataRead): void {
bool $metadataRead, bool $deny = true): void {
$c = RC::$config->accessControl;
if ($metadataRead && !$c->enforceOnMetadata || $this->isAdmin) {
return;
Expand All @@ -115,7 +115,11 @@ public function checkAccessRights(int $resId, string $privilege,
$allowed = json_decode($allowed) ?? [];
$default = $c->defaultAction->$privilege ?? self::DEFAULT_DENY;
if (count(array_intersect($this->userRoles, $allowed)) === 0 && $default !== self::DEFAULT_ALLOW) {
$this->denyAccess($allowed);
if ($deny) {
$this->denyAccess($allowed);
} else {
throw new RepoException('Unauthorized', $this->isPublic() ? 401 : 403);
}
}
}

Expand Down Expand Up @@ -226,7 +230,6 @@ public function denyAccess(array $allowed): void {
throw new RepoException((string) $resp->getBody(), $resp->getStatusCode(), headers: $headers);
}
}
RC::$log->alert("FOO! " . implode(',', $cookieHeader));
throw new RepoException('Forbidden', 403, headers: $cookieHeader);
}

Expand Down
4 changes: 2 additions & 2 deletions src/acdhOeaw/arche/core/BinaryPayload.php
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ public function getHeaders(): array {
throw new NoBinaryException();
}
if (!empty($data->filename)) {
$headers['Content-Disposition'] = 'attachment; filename="' . $data->filename . '"';
$headers['Content-Disposition'] = "attachment; filename*=UTF-8''" . rawurlencode($data->filename);
}
if (!empty($data->mime)) {
$headers['Content-Type'] = $data->mime;
Expand Down Expand Up @@ -278,7 +278,7 @@ private function getRequestMetadataRaw(): array {

$fileName = null;
if (preg_match('/^attachment; filename=/', $contentDisposition)) {
$fileName = (string) preg_replace('/^attachment; filename="?/', '', $contentDisposition);
$fileName = (string) preg_replace('/^attachment; filename(=|[*]=.*\'.*\')"?/', '', $contentDisposition);
$fileName = (string) preg_replace('/"$/', '', $fileName);
RC::$log->debug("\t\tfile name: $fileName");
}
Expand Down
215 changes: 215 additions & 0 deletions src/acdhOeaw/arche/core/Download.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
<?php

/*
* The MIT License
*
* Copyright 2024 zozlak.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

namespace acdhOeaw\arche\core;

use PDO;
use PDOStatement;
use DateTimeImmutable;
use ZipStream\ZipStream;
use acdhOeaw\arche\core\RestController as RC;
use ZipStream\CompressionMethod;

/**
* Class handling the batch download endpoint
*
* @author zozlak
*/
class Download {

const DEFAULT_COMPRESSION_METHOD = CompressionMethod::STORE;
const DEFAULT_COMPRESSION_LEVEL = 1;
const DEFAULT_STRICT = false;
const DEFAULT_FILE_NAME = 'data.zip';
const FORBIDDEN_FILENAME_CHARS_REGEX = '/[^-_[:alnum:] ]/u';
const FORBIDDEN_FILENAME_CHARS_REPLACE = '_';

/**
*
* @var array<string, array<string, string>>
*/
private array $parents;
private PDOStatement $parentQuery;

/**
*
* @var array<mixed>
*/
private array $parentQueryParam;

public function get(): void {
$ids = $_GET['ids'] ?? $_POST['ids'] ?? [];
if (!is_array($ids)) {
$ids = [$ids];
}
if (count($ids) === 0) {
throw new RepoException('No resources identifiers provided');
}

$allIds = $this->collectChildren($ids);
unset($ids);

$skip = (bool) json_decode($_GET['skipUnauthorized'] ?? false); // so "false" is turned into false
$validIds = $this->checkAccessRights($allIds, $skip);
unset($allIds);
if (count($validIds) === 0) {
throw new RepoException("Unauthorized to download all requested resources", 403);
}

// create a zip
$cfg = RC::$config->download;
$strict = strtoupper($_GET['strict'] ?? $cfg->strict ?? self::DEFAULT_STRICT);
$method = match ($cfg->compressionMethod ?? '') {
'store' => CompressionMethod::STORE,
'deflate' => CompressionMethod::DEFLATE,
default => self::DEFAULT_COMPRESSION_METHOD,
};
$level = $cfg->compressionLevel ?? self::DEFAULT_COMPRESSION_LEVEL;
$fileName = $cfg->fileName ?? self::DEFAULT_FILE_NAME;

$metaQuery = RC::$pdo->prepare("
SELECT m1.value AS filename, m2.value AS lastmod, m3.value_n AS filesize, r.target_id AS parent
FROM
metadata m1
JOIN metadata m2 USING (id)
JOIN metadata m3 USING (id)
LEFT JOIN relations r ON m1.id = r.id AND r.property = ?
WHERE
m1.id = ?
AND m1.property = ?
AND m2.property = ?
AND m3.property = ?
");
$metaQueryParam = [
RC::$schema->parent,
null,
RC::$schema->fileName,
RC::$schema->binaryModificationDate,
RC::$schema->binarySize,
];
$this->parents = [];
unset($this->parentQuery);
unset($this->parentQueryParam);
$zip = new ZipStream(defaultCompressionMethod: $method, defaultDeflateLevel: $level, enableZip64: !$strict, defaultEnableZeroHeader: !$strict, outputName: $fileName);
foreach ($validIds as $id) {
$binary = new BinaryPayload($id);
$path = $binary->getPath();
if (!file_exists($path)) {
continue; // metadata-only resource
}
$metaQueryParam[1] = $id;
$metaQuery->execute($metaQueryParam);
$meta = $metaQuery->fetchObject();
$this->fetchParentsMeta($id, $meta);
$filename = $meta->filename;
$pid = (string) $meta->parent;
while (!empty($pid)) {
$filename = $this->parents[$pid]['filename'] . '/' . $filename;
$pid = $this->parents[$pid]['parent'];
}
$zip->addFileFromPath($filename, $path, lastModificationDateTime: new DateTimeImmutable($meta->lastmod), exactSize: $meta->filesize);
}
$zip->finish();
}

public function options(int $code = 204): void {
http_response_code($code);
header('Allow: OPTIONS, HEAD, GET, POST');
}

private function fetchParentsMeta(int $id, object $meta): void {
$this->parentQuery ??= RC::$pdo->prepare("
SELECT r.id, n, COALESCE(m1.value, m2.value) AS filename
FROM
get_relatives(?, ?, 0, -999999, false, false) r
LEFT JOIN metadata m1 ON r.id = m1.id AND m1.property = ?
LEFT JOIN metadata m2 ON r.id = m2.id AND m2.property = ?
ORDER BY n DESC
");
$this->parentQueryParam ??= [
null,
RC::$schema->parent,
RC::$schema->fileName,
RC::$schema->label,
];
if ($meta->parent !== null && !isset($this->parents[$meta->parent])) {
$this->parentQueryParam[0] = $id;
$this->parentQuery->execute($this->parentQueryParam);
$parentsMeta = $this->parentQuery->fetchAll(PDO::FETCH_OBJ);
for ($i = 0; $i < count($parentsMeta); $i++) {
$pid = (string) $parentsMeta[$i]->id;
if (isset($tthis->parents[$pid])) {
break;
}
$this->parents[$pid] = [
'filename' => preg_replace(self::FORBIDDEN_FILENAME_CHARS_REGEX, self::FORBIDDEN_FILENAME_CHARS_REPLACE, $parentsMeta[$i]->filename),
'parent' => (string) ($parentsMeta[$i + 1] ?? null)?->id,
];
}
}
}

/**
*
* @param array<string> $ids
* @return array<string>
*/
private function collectChildren(array $ids): array {
$query = RC::$pdo->prepare("SELECT id FROM get_relatives(?, ?, 999999, 0, false, false)");
$param = [null, RC::$schema->parent];
foreach ($ids as $id) {
$param[0] = $id;
$query->execute($param);
while ($i = $query->fetchColumn()) {
$allIds[(string) $i] = '';
}
}
return $allIds;
}

/**
*
* @param array<string> $ids
* @return array<int>
*/
private function checkAccessRights(array $ids, bool $skipUnauthorized): array {
$validIds = [];
foreach (array_keys($ids) as $id) {
try {
RC::$log->debug("Testing $id");
$id = (int) $id;
RC::$auth->checkAccessRights($id, 'read', false);
RC::$log->debug(" passed");
$validIds[] = $id;
} catch (RepoException $e) {
if (!$skipUnauthorized || !in_array($e->getCode(), [401, 403])) {
throw $e;
}
}
}
return $validIds;
}
}
7 changes: 7 additions & 0 deletions src/acdhOeaw/arche/core/RestController.php
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ static public function handleRequest(): void {
} else {
$search->options(405);
}
} elseif ($path === 'download') {
$dwnld = new Download();
if ($method === 'Get' || $method === 'Post') {
$dwnld->get();
} else {
$dwnld->options($method === 'Options' ? 204 : 405);
}
} elseif (preg_match('>^([0-9]+/?)?(metadata|tombstone)?$>', $path)) {
$collection = $suffix = '';
$id = null;
Expand Down
Loading

0 comments on commit 737e25b

Please sign in to comment.