From d4de081c8f92bd4e8f00a83dc60977c2ef4527ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20=C5=BB=C3=B3=C5=82tak?= Date: Mon, 7 Dec 2020 11:19:03 +0100 Subject: [PATCH] Make it pass almost all CLARIN FCS endpoint tests. SearchRetrieve paging still requires implementing --- src/acdhOeaw/arche/fcs/Endpoint.php | 82 +++++++++++++++++------- src/acdhOeaw/arche/fcs/SruException.php | 18 ++++-- src/acdhOeaw/arche/fcs/SruParameters.php | 30 +++++++-- src/acdhOeaw/arche/fcs/SruResponse.php | 46 +++++++++---- 4 files changed, 129 insertions(+), 47 deletions(-) diff --git a/src/acdhOeaw/arche/fcs/Endpoint.php b/src/acdhOeaw/arche/fcs/Endpoint.php index 00d0b85..bb847b6 100644 --- a/src/acdhOeaw/arche/fcs/Endpoint.php +++ b/src/acdhOeaw/arche/fcs/Endpoint.php @@ -30,6 +30,7 @@ use DOMNode; use PDO; use acdhOeaw\cql\Parser; +use acdhOeaw\cql\ParserException; /** * Description of Endpoint @@ -68,7 +69,7 @@ public function __construct(object $cfg) { } public function handleRequest(): void { - $resp = new SruResponse('explain', '1.2'); + $resp = new SruResponse('explain', $this->cfg->defaultVersion ?? SruResponse::SRU_MAX_VERSION); try { switch ($_SERVER['REQUEST_METHOD'] ?? '') { case 'GET': @@ -85,6 +86,7 @@ public function handleRequest(): void { return; } $param = new SruParameters($src, $this->cfg->defaultVersion ?? SruResponse::SRU_MAX_VERSION); + $resp = new SruResponse($param->operation, $this->cfg->defaultVersion ?? SruResponse::SRU_MAX_VERSION); switch ($param->operation) { case 'explain': @@ -104,7 +106,6 @@ public function handleRequest(): void { header('Content-Type: application/xml'); echo (string) $resp; } catch (SruException $e) { - $resp->addRecord(null, ''); $resp->addDiagnostics($e); header('Content-Type: application/xml'); echo (string) $resp; @@ -156,7 +157,7 @@ private function handleExplain(SruParameters $param): SruResponse { if ($param->xFcsEndpointDescription === 'true') { $ed = $resp->createElementNs(self::NMSP_FCS_ENDPOINT_DESC, 'ed:EndpointDescription'); - $ed->setAttribute('version', '2'); + $ed->setAttribute('version', $param->version >= 2 ? '2' : '1'); $cpbs = $ed->appendChild($resp->createElementNs(self::NMSP_FCS_ENDPOINT_DESC, 'ed:Capabilities')); $cpbs->appendChild($resp->createElementNs(self::NMSP_FCS_ENDPOINT_DESC, 'ed:Capability', self::CPBLT_BASIC_SEARCH)); $sdvs = $ed->appendChild($resp->createElementNs(self::NMSP_FCS_ENDPOINT_DESC, 'ed:SupportedDataViews')); @@ -178,7 +179,9 @@ private function handleExplain(SruParameters $param): SruResponse { private function handleSearch(SruParameters $param): SruResponse { $this->checkParam($param, 'search'); - $resp = new SruResponse('searchRetrieve', $param->version); + $param->maximumRecords = (int) $param->maximumRecords; + $param->startRecord = (int) $param->startRecord; + $resp = new SruResponse('searchRetrieve', $param->version); $pdo = $this->getDbHandle(); $query = $this->cfg->resourceQuery->query; @@ -191,8 +194,12 @@ private function handleSearch(SruParameters $param): SruResponse { $query->execute($this->cfg->resourceQuery->parameters); $this->processFcsContext($param, $pdo); - $cqlParser = new Parser($param->query); - $tsquery = $cqlParser->asTsquery(); + try { + $cqlParser = new Parser($param->query); + } catch (ParserException $e) { + throw new SruException('', 10); + } + $tsquery = $cqlParser->asTsquery(); $hglghOpts = sprintf( 'MaxWords=%d,MinWords=%d,ShortWord=%d,MaxFragments=%d,FragmentDelimiter=%s', $this->cfg->highlighting->maxWords, @@ -215,24 +222,30 @@ private function handleSearch(SruParameters $param): SruResponse { $queryParam = [$tsquery, $hglghOpts, self::FTS_PROPERTY_BINARY, $tsquery]; $query = $pdo->prepare($query); $query->execute($queryParam); - while ($res = $query->fetchObject()) { - $xmlRes = $resp->createElementNs(self::NMSP_FCS_RESOURCE, 'fcs:Resource'); + $n = 0; + while (($res = $query->fetchObject()) && $n < $param->maximumRecords) { + $xmlRes = $resp->createElementNs(self::NMSP_FCS_RESOURCE, 'fcs:Resource'); $xmlRes->setAttribute('pid', $res->pid); - + $hits = explode(self::FRAGMENT_DELIMITER, $res->hits); foreach ($hits as $hit) { - $xmlResFrag = $xmlRes->appendChild($resp->createElementNs(self::NMSP_FCS_RESOURCE, 'fcs:ResourceFragment')); + $xmlResFrag = $xmlRes->appendChild($resp->createElementNs(self::NMSP_FCS_RESOURCE, 'fcs:ResourceFragment')); $xmlHitDataView = $xmlResFrag->appendChild($resp->createElementNs(self::NMSP_FCS_RESOURCE, 'fcs:DataView')); $xmlHitDataView->setAttribute('type', self::MIME_FCS_HITS); - $xmlHit = $xmlHitDataView->appendChild($resp->createElementNs(self::NMSP_FCS_HITS, 'hits:Result')); - $offset = 0; - while ($p1 = strpos($hit, '', $offset)) { + $xmlHit = $xmlHitDataView->appendChild($resp->createElementNs(self::NMSP_FCS_HITS, 'hits:Result')); + $offset = 0; + while ($p1 = strpos($hit, '', $offset)) { $xmlHit->appendChild($xmlHit->ownerDocument->createTextNode(substr($hit, $offset, $p1))); $p2 = strpos($hit, '', $offset + 3); $xmlHit->appendChild($resp->createElementNs(self::NMSP_FCS_HITS, 'hits:hit', substr($hit, $p1 + 3, $p2 - $p1 - 3))); $offset = $p2 + 4; } $xmlHit->appendChild($xmlHit->ownerDocument->createTextNode(substr($hit, $offset))); + + $n++; + if ($n >= $param->maximumRecords) { + break; + } } foreach ($param->xFcsDataviews as $dv) { @@ -241,7 +254,7 @@ private function handleSearch(SruParameters $param): SruResponse { $resp->addRecord($xmlRes, self::NMSP_FCS_RESOURCE); } - + return $resp; } @@ -250,8 +263,6 @@ private function handleScan(SruParameters $param): SruResponse { $resp = new SruResponse('scan', $param->version); throw new SruException('', 4); - - return $resp; } private function explainDescribeResources(DOMNode $container, @@ -280,31 +291,58 @@ private function checkParam(SruParameters $param, string $operation): void { throw new SruException(SruResponse::SRU_MAX_VERSION, 5); } if ($param->renderedBy !== 'client') { - throw new SruException('RenderedBy', 6); + throw new SruException('renderedBy', 6); } - if ($param->recordXMLEscaping !== 'XML') { + if ($param->recordXMLEscaping !== 'xml') { throw new SruException('', 71); } - if (!empty($param->sortKeys)) { - throw new SruException('', 80); - } if ($param->httpAccept !== 'application/sru+xml') { // it makes no sense to check it as it will e.g. // make it unable to test the endpoint in a browser as browsers set Accept text/html //throw new FcsException('Not Acceptable', 406); } if ($operation === 'search') { - if ($param->query === null) { + if (empty($param->query)) { throw new SruException('query', 7); } if ($param->queryType !== 'cql' && $this->queryType !== 'searchTerms') { throw new SruException('queryType', 6); } + if ($param->startRecord !== '' && !preg_match('/^[1-9][0-9]*$/', $param->startRecord)) { + throw new SruException('startRecord', 6); + } + if ((string) $param->maximumRecords !== '' && !preg_match('/^[1-9][0-9]*$/', $param->maximumRecords)) { + throw new SruException('maximumRecords', 6); + } + if ((string) $param->resultSetTTL !== '') { + throw new SruException('resultSetTTL', 8); + } + + if (!empty($param->recordSchema) && $param->recordSchema !== self::NMSP_FCS_RESOURCE) { + throw new SruException($param->recordSchema, 66); + } + if (!empty($param->recordPacking)) { + if ($param->version >= 2 && $param->recordPacking !== 'packed') { + throw new SruException('recordPacking', 6); + }else if ($param->version < 2 && $param->recordPacking !== 'xml') { + throw new SruException('', 71); + } + } foreach ($param->xFcsDataviews as $i) { if ($i !== self::ID_DATA_VIEW_CMDI && !(empty(trim($i) || count($param->xFcsDataviews) > 1))) { throw new SruException('', 4); } } + } elseif ($operation === 'scan') { + if (empty($param->scanClause)) { + throw new SruException('scanClause', 7); + } + if ((string) $param->responsePosition !== '' && !preg_match('/^[1-9][0-9]*$/', $param->responsePosition)) { + throw new SruException('responsePosition', 6); + } + if ((string) $param->maximumTerms !== '' && !preg_match('/^[1-9][0-9]*$/', $param->maximumTerms)) { + throw new SruException('maximumTerms', 6); + } } } diff --git a/src/acdhOeaw/arche/fcs/SruException.php b/src/acdhOeaw/arche/fcs/SruException.php index e5cb3b8..32be9cd 100644 --- a/src/acdhOeaw/arche/fcs/SruException.php +++ b/src/acdhOeaw/arche/fcs/SruException.php @@ -34,6 +34,7 @@ * @author zozlak */ class SruException extends FcsException { + /** * List of all SRU diagnostic codes. * @@ -102,7 +103,7 @@ class SruException extends FcsException { 68 => 'Not authorized to send record', 69 => 'Not authorized to send record in this schema', 70 => 'Record too large to send', // Maximum record size - 71 => 'Unsupported recordXMLEscaping value', + 71 => 'Unsupported recordXMLEscaping/recordPacking value', 72 => 'XPath retrieval unsupported', 73 => 'XPath expression contains unsupported feature', // Feature 74 => 'Unable to evaluate XPath expression', @@ -130,13 +131,16 @@ class SruException extends FcsException { 235 => 'Database does not exist', ]; - public function appendToXmlNode(DOMNode $node): void { - $d = $node->ownerDocument->createElementNS(SruResponse::DIAGNOSTICS_NMSP, 'diag:diagnostic'); - $uri = $node->ownerDocument->createElementNS(SruResponse::DIAGNOSTICS_NMSP, 'diag:uri', 'info:srw/diagnostic/1/' . $this->getCode()); - $details = $node->ownerDocument->createElementNS(SruResponse::DIAGNOSTICS_NMSP, 'diag:details', $this->getMessage()); - $message = $node->ownerDocument->createElementNS(SruResponse::DIAGNOSTICS_NMSP, 'diag:message', self::$exceptions[$this->getCode()]); + public function appendToXmlNode(DOMNode $node, string $namespace): void { + $d = $node->ownerDocument->createElementNS($namespace, 'diag:diagnostic'); + $uri = $node->ownerDocument->createElementNS($namespace, 'diag:uri', 'info:srw/diagnostic/1/' . $this->getCode()); $d->appendChild($uri); - $d->appendChild($details); + $details = $this->getMessage(); + if (!empty($details)) { + $details = $node->ownerDocument->createElementNS($namespace, 'diag:details', $details); + $d->appendChild($details); + } + $message = $node->ownerDocument->createElementNS($namespace, 'diag:message', self::$exceptions[$this->getCode()]); $d->appendChild($message); $node->appendChild($d); } diff --git a/src/acdhOeaw/arche/fcs/SruParameters.php b/src/acdhOeaw/arche/fcs/SruParameters.php index 1bd2566..b4e91ae 100644 --- a/src/acdhOeaw/arche/fcs/SruParameters.php +++ b/src/acdhOeaw/arche/fcs/SruParameters.php @@ -56,24 +56,32 @@ class SruParameters { public $xFcsEndpointDescription; public $xFcsContext; public $xFcsDataViews; + public $scanClause; + public $responsePosition; + public $maximumTerms; public function __construct(array $src, string $defaultVersion) { - $this->operation = $src['operation'] ?? 'explain'; - $this->version = $src['version'] ?? $defaultVersion; $this->query = $src['query'] ?? null; - $this->startRecord = $src['startRecord'] ?? 1; // startPosition - $this->maximumRecords = $src['maximumRecords'] ?? null; // maximumItems - $this->recordXMLEscaping = $src['recordXMLEscaping'] ?? 'XML'; + $this->operation = $src['operation'] ?? ''; + $this->version = $src['version'] ?? $defaultVersion; + $this->recordXMLEscaping = $src['recordXMLEscaping'] ?? 'xml'; $this->recordSchema = $src['recordSchema'] ?? null; // responseItemType $this->resultSetTTL = $src['resultSetTTL'] ?? null; $this->stylesheet = $src['Stylesheet'] ?? null; + $this->recordPacking = $src['recordPacking'] ?? ($this->version >= 2 ? 'packed' : 'xml'); + // searchRetrieve-specific + $this->startRecord = $src['startRecord'] ?? '1'; // startPosition + $this->maximumRecords = $src['maximumRecords'] ?? null; // maximumItems + // scan-specific + $this->scanClause = $src['scanClause'] ?? null; + $this->responsePosition = $src['responsePosition'] ?? null; + $this->maximumTerms = $src['maximumTerms'] ?? null; // SRU 2.0 $this->queryType = $src['queryType'] ?? 'cql'; $this->sortKeys = $src['sortKeys'] ?? null; // sortOrder $this->renderedBy = $src['RenderedBy'] ?? 'client'; $this->httpAccept = $src['httpAccept'] ?? ($_SERVER['HTTP_ACCEPT'] ?? 'application/sru+xml'); // responseFormat $this->responseType = $src['responseType'] ?? null; - $this->recordPacking = $src['recordPacking'] ?? 'packed'; $this->facetSort = $src['facetSort'] ?? null; $this->facetStart = $src['facetStart'] ?? null; $this->facetLimit = $src['facetLimit'] ?? null; @@ -83,6 +91,16 @@ public function __construct(array $src, string $defaultVersion) { $this->xFcsEndpointDescription = $src['x-fcs-endpoint-description'] ?? false; $this->xFcsContext = explode(',', $src['x-fcs-context'] ?? ''); // SRU error 1 if not exists $this->xFcsDataviews = explode(',', $src['x-fcs-dataviews'] ?? ''); // SRU error 4 if not exists + // default operation handling + if ($this->operation === '') { + if (!empty($this->query)) { + $this->operation = 'searchRetrieve'; + } elseif (!empty($this->scanClause)) { + $this->operation = 'scan'; + } else { + $this->operation = 'explain'; + } + } } } diff --git a/src/acdhOeaw/arche/fcs/SruResponse.php b/src/acdhOeaw/arche/fcs/SruResponse.php index 89c0cfe..5331c29 100644 --- a/src/acdhOeaw/arche/fcs/SruResponse.php +++ b/src/acdhOeaw/arche/fcs/SruResponse.php @@ -37,25 +37,39 @@ */ class SruResponse { - const ZEEREX_NMSP = 'http://explain.z3950.org/dtd/2.1/'; - const DIAGNOSTICS_NMSP = 'http://www.loc.gov/zing/srw/diagnostic/'; - const RECORD_SCHEMA = 'http://explain.z3950.org/dtd/2.1/'; - const SRU_MAX_VERSION = '2.0'; - const SRU_NMSP_1 = 'http://www.loc.gov/zing/srw/'; - const SRU_NMSP_2 = 'http://docs.oasis-open.org/ns/search-ws/sruResponse'; + const ZEEREX_NMSP = 'http://explain.z3950.org/dtd/2.1/'; + const DIAGNOSTICS_NMSP_1 = 'http://www.loc.gov/zing/srw/diagnostic/'; + const DIAGNOSTICS_NMSP_2 = 'http://docs.oasis-open.org/ns/search-ws/diagnostic'; + const RECORD_SCHEMA = 'http://explain.z3950.org/dtd/2.1/'; + const SRU_MAX_VERSION = '2.0'; + const SRU_NMSP_1 = 'http://www.loc.gov/zing/srw/'; + const SRU_NMSP_2 = 'http://docs.oasis-open.org/ns/search-ws/sruResponse'; + const SRU_NMSP_SCAN_2 = 'http://docs.oasis-open.org/ns/search-ws/scan'; private $version; private $nmsp; private $doc; private $root; + private $recordRoot; + private $numberOfRecords; public function __construct(string $responseType, string $version) { $this->version = (float) $version; $this->nmsp = $this->version >= 2 ? self::SRU_NMSP_2 : self::SRU_NMSP_1; - $this->doc = new DOMDocument('1.0', 'utf-8'); - $this->root = $this->doc->createElementNS($this->nmsp, "sru:{$responseType}Response"); + if ($this->version >= 2 && $responseType === 'scan') { + $this->nmsp = self::SRU_NMSP_SCAN_2; + } + $this->doc = new DOMDocument('1.0', 'utf-8'); + $this->root = $this->doc->createElementNS($this->nmsp, "sru:{$responseType}Response"); $this->doc->appendChild($this->root); $this->root->appendChild($this->doc->createElementNS($this->nmsp, 'sru:version', sprintf('%.1f', $this->version))); + $this->recordRoot = $this->root; + + if ($responseType === 'searchRetrieve') { + //TODO sru:resourceCountPrecision only in SRU 2.0 + $this->numberOfRecords = 0; + $this->recordRoot = $this->root->appendChild($this->doc->createElementNS($this->nmsp, 'sru:records')); + } } public function createElementNs(string $ns, string $el, @@ -65,20 +79,20 @@ public function createElementNs(string $ns, string $el, public function addDiagnostics(SruException $e): void { $d = $this->root->appendChild($this->createElementNs($this->nmsp, 'sru:diagnostics')); - $e->appendToXmlNode($d); + $e->appendToXmlNode($d, $this->version >= 2 ? self::DIAGNOSTICS_NMSP_2 : self::DIAGNOSTICS_NMSP_1); } public function addRecord(?DOMNode $content, string $schema, ?string $id = null, ?int $position = null): void { - $rec = $this->root->appendChild($this->doc->createElementNS($this->nmsp, 'sru:record')); + $rec = $this->recordRoot->appendChild($this->doc->createElementNS($this->nmsp, 'sru:record')); if ($content === null) { return; } $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordSchema', $schema)); if ($this->version >= 2) { - $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordXMLEscaping', 'XML')); + $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordXMLEscaping', 'xml')); } else { - $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordPacking', 'XML')); + $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordPacking', 'xml')); } if (!empty($id)) { $rec->appendChild($this->doc->createElementNS($this->nmsp, 'sru:recordIdentifier', $id)); @@ -89,6 +103,8 @@ public function addRecord(?DOMNode $content, string $schema, $d = $this->doc->createElementNS($this->nmsp, 'sru:recordData'); $d->appendChild($content); $rec->appendChild($d); + + $this->numberOfRecords++; } public function addExtraResponseData(DOMNode $extra): void { @@ -97,6 +113,12 @@ public function addExtraResponseData(DOMNode $extra): void { } public function __toString(): string { + if ($this->root !== $this->recordRoot) { + $this->root->insertBefore($this->doc->createElementNS($this->nmsp, 'sru:numberOfRecords', $this->numberOfRecords), $this->root->firstChild->nextSibling); + if ($this->numberOfRecords === 0) { + $this->root->removeChild($this->recordRoot); + } + } return $this->doc->saveXML(); }