From 4984596f4e24d5b390786c36cbd01fed044bfc2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20=C5=BB=C3=B3=C5=82tak?= Date: Wed, 26 Jul 2023 22:06:12 +0200 Subject: [PATCH] Adjusted to rdfInterface v2 --- arche-ref-sources | 8 +- composer.json | 10 +-- .../arche/refSources/NamedEntityFile.php | 2 +- .../arche/refSources/NamedEntityInterface.php | 11 ++- .../refSources/NamedEntityIteratorFile.php | 13 ++- .../arche/refSources/NamedEntityRepo.php | 8 +- .../arche/refSources/NamedEntityTrait.php | 10 ++- .../arche/refSources/PropertyMapping.php | 85 ++++++++++--------- .../arche/refSources/PropertyMappings.php | 6 +- 9 files changed, 88 insertions(+), 65 deletions(-) diff --git a/arche-ref-sources b/arche-ref-sources index 7eacaef..6a3dcdf 100755 --- a/arche-ref-sources +++ b/arche-ref-sources @@ -28,9 +28,9 @@ use zozlak\argparse\ArgumentParser as AP; use quickRdf\DataFactory as DF; use quickRdf\Dataset; +use quickRdf\DatasetNode; use quickRdfIo\Util as ioUtil; use rdfInterface2easyRdf\AsRdfInterface; -use rdfHelpers\DatasetNode; use acdhOeaw\UriNormalizer; use acdhOeaw\UriNormRules; use acdhOeaw\UriNormalizerRule; @@ -68,8 +68,8 @@ $parser->addArgument('--id', help: 'process only resource with a given id'); $parser->addArgument('--extDbName', help: 'process only resource from a given external authority file'); $parser->addArgument('--repoUrl', help: 'use a given repository instance (overwrites the `repositoryUrl` property read from the config file)'); $parser->addArgument('--inputFile', help: 'read resources from a given RDF file instead of the ARCHE repository'); -$parser->addArgument('--user', help: 'user name used for repository authentication (not important if --test or --resolveOnly are used)'); -$parser->addArgument('--pswd', help: 'password used for repository authentication (not important if --test or --resolveOnly are used)'); +$parser->addArgument('--user', help: 'user name used for repository authentication (not important in mode "test", "parse" and "resolve")'); +$parser->addArgument('--pswd', help: 'password used for repository authentication (not important in mode "test", "parse" and "resolve")'); $parser->addArgument('--mode', default: 'parse', choices: array_keys($modes), help: "operation mode\n - resolve - only try to resolve the external URI (finds broken external URIs)\n - parse [default] - resolve the external URI and parse the output (when used with --verbose and/or --output it allows to inspect the data provided by the external source and test the metadata mapping defined in the configuration file)\n - test - tries to update the repository resource with data fetched from the external source (so doorkeeper checks are performed) but rolls back the update no matter if it was successful or not\n - update - updates the repository resource with data fetched from the external source\n"); $parser->addArgument('--verbose', action: AP::ACTION_STORE_TRUE, help: 'provide more verbose output, especially print the data fetched from the external reference source'); $parser->addArgument('--output', help: "when used, the data to be saved to the repository is also saved in a TTL file (the output isn't created in mode=resolve)"); @@ -177,7 +177,7 @@ foreach ($cfg->referenceSources as $extDbName => $extDbCfg) { // don't even try to resolve identifiers for which there's no mapping $mappings->matchExternalDatabase($id); echo " fetching data for $id\n"; - $meta = AsRdfInterface::addDatasetNode($normalizer->fetch($id), new DF(), fn($x) => new DatasetNode(new Dataset(), $x)); + $meta = AsRdfInterface::addDatasetNode($normalizer->fetch($id), new DF(), fn($x) => new DatasetNode($x)); $uriStr = $meta->getNode()->getValue(); $extDbName = $mappings->matchExternalDatabase($uriStr); if (!isset($entityExtMeta[$extDbName])) { diff --git a/composer.json b/composer.json index 61458dc..3612c35 100644 --- a/composer.json +++ b/composer.json @@ -20,9 +20,10 @@ "acdh-oeaw/uri-normalizer": "^2", "zozlak/argparse": "^1", "sweetrdf/quick-rdf-io": "^1", - "sweetrdf/quick-rdf": "^1", + "sweetrdf/quick-rdf": "^2.0.0-RC1", + "sweetrdf/rdf-interface": "^2.0.0-RC1", "sweetrdf/term-templates": "^1", - "sweetrdf/rdfinterface2easyrdf": "^0.2" + "sweetrdf/rdfinterface2easyrdf": "^0.3" }, "autoload": { "psr-4": { @@ -31,7 +32,7 @@ }, "require-dev": { "phpunit/phpunit": "^9", - "phpstan/phpstan": "^1" + "phpstan/phpstan": "*" }, "autoload-dev": { "psr-4": { @@ -40,6 +41,5 @@ }, "bin": [ "arche-ref-sources" - ], - "minimum-stability": "RC" + ] } diff --git a/src/acdhOeaw/arche/refSources/NamedEntityFile.php b/src/acdhOeaw/arche/refSources/NamedEntityFile.php index 28f60e6..7d8e04d 100644 --- a/src/acdhOeaw/arche/refSources/NamedEntityFile.php +++ b/src/acdhOeaw/arche/refSources/NamedEntityFile.php @@ -27,7 +27,7 @@ namespace acdhOeaw\arche\refSources; use quickRdf\DataFactory as DF; -use rdfHelpers\DatasetNode; +use quickRdf\DatasetNode; use termTemplates\QuadTemplate as QT; use acdhOeaw\UriNormalizer; use acdhOeaw\UriNormalizerException; diff --git a/src/acdhOeaw/arche/refSources/NamedEntityInterface.php b/src/acdhOeaw/arche/refSources/NamedEntityInterface.php index 9d3966f..9823e03 100644 --- a/src/acdhOeaw/arche/refSources/NamedEntityInterface.php +++ b/src/acdhOeaw/arche/refSources/NamedEntityInterface.php @@ -27,7 +27,7 @@ namespace acdhOeaw\arche\refSources; use acdhOeaw\UriNormalizer; -use rdfHelpers\DatasetNode; +use rdfInterface\DatasetNodeInterface; /** * @@ -44,7 +44,12 @@ public function getUri(): string; */ public function getIdentifiers(UriNormalizer $normalizer): array; - public function getMetadata(): DatasetNode; + public function getMetadata(): DatasetNodeInterface; - public function updateMetadata(DatasetNode $meta): array; + /** + * + * @param DatasetNodeInterface $meta + * @return array + */ + public function updateMetadata(DatasetNodeInterface $meta): array; } diff --git a/src/acdhOeaw/arche/refSources/NamedEntityIteratorFile.php b/src/acdhOeaw/arche/refSources/NamedEntityIteratorFile.php index a91d8b1..9deec90 100644 --- a/src/acdhOeaw/arche/refSources/NamedEntityIteratorFile.php +++ b/src/acdhOeaw/arche/refSources/NamedEntityIteratorFile.php @@ -27,10 +27,10 @@ namespace acdhOeaw\arche\refSources; use quickRdf\Dataset; +use quickRdf\DatasetNode; use quickRdf\DataFactory; use termTemplates\QuadTemplate as QT; use termTemplates\ValueTemplate as VT; -use rdfHelpers\DatasetNode; use quickRdfIo\Util as ioUtil; use zozlak\RdfConstants as RDF; use acdhOeaw\arche\lib\Schema; @@ -46,8 +46,17 @@ class NamedEntityIteratorFile implements NamedEntityIteratorInterface { private Dataset $graph; private Schema $schema; private Repo $repo; + + /** + * + * @var array<\termTemplates\QuadTemplate> + */ private array $filters = []; private ?int $limit = null; + /** + * + * @var array<\rdfInterface\TermInterface> + */ private array $matching; public function __construct(string $rdfFilePath, Repo $repo) { @@ -79,7 +88,7 @@ public function getNamedEntities(): \Generator { } foreach ($this->matching as $i) { $meta = $this->graph->copy(new QT($i)); - $meta = new DatasetNode($meta, $i); + $meta = DatasetNode::factory($i)->withDataset($meta); yield new NamedEntityFile($meta, $this, $this->repo); } } diff --git a/src/acdhOeaw/arche/refSources/NamedEntityRepo.php b/src/acdhOeaw/arche/refSources/NamedEntityRepo.php index 7e5728c..d88590f 100644 --- a/src/acdhOeaw/arche/refSources/NamedEntityRepo.php +++ b/src/acdhOeaw/arche/refSources/NamedEntityRepo.php @@ -26,13 +26,13 @@ namespace acdhOeaw\arche\refSources; -use quickRdf\Dataset; use quickRdf\DataFactory; -use rdfHelpers\DatasetNode; +use quickRdf\DatasetNode; use acdhOeaw\UriNormalizer; use acdhOeaw\UriNormalizerException; use acdhOeaw\arche\lib\Repo; use acdhOeaw\arche\lib\RepoResource; +use rdfInterface\DatasetNodeInterface; use rdfInterface2easyRdf\AsRdfInterface; /** @@ -52,11 +52,11 @@ public function __construct(RepoResource $res) { $this->repo = $res->getRepo(); } - public function getMetadata(): DatasetNode { + public function getMetadata(): DatasetNodeInterface { return AsRdfInterface::addDatasetNode( $this->res->getMetadata(), new DataFactory(), - fn($x) => new DatasetNode(new Dataset(), $x) + fn($x) => DatasetNode::factory($x) ); } diff --git a/src/acdhOeaw/arche/refSources/NamedEntityTrait.php b/src/acdhOeaw/arche/refSources/NamedEntityTrait.php index 40ed18f..3218ebf 100644 --- a/src/acdhOeaw/arche/refSources/NamedEntityTrait.php +++ b/src/acdhOeaw/arche/refSources/NamedEntityTrait.php @@ -27,8 +27,8 @@ namespace acdhOeaw\arche\refSources; use quickRdf\DataFactory; -use rdfHelpers\DatasetNode; use acdhOeaw\arche\lib\RepoResource; +use rdfInterface\DatasetNodeInterface; use rdfInterface2easyRdf\AsEasyRdf; use termTemplates\QuadTemplate as QT; use acdhOeaw\arche\lib\SearchTerm; @@ -45,7 +45,13 @@ trait NamedEntityTrait { private Repo $repo; - public function updateMetadata(DatasetNode $meta, bool $test = true): array { + /** + * + * @param DatasetNodeInterface $meta + * @param bool $test + * @return array + */ + public function updateMetadata(DatasetNodeInterface $meta, bool $test = true): array { if ($this->repo->inTransaction()) { $this->repo->rollback(); } diff --git a/src/acdhOeaw/arche/refSources/PropertyMapping.php b/src/acdhOeaw/arche/refSources/PropertyMapping.php index 44d300f..079201c 100644 --- a/src/acdhOeaw/arche/refSources/PropertyMapping.php +++ b/src/acdhOeaw/arche/refSources/PropertyMapping.php @@ -27,17 +27,16 @@ namespace acdhOeaw\arche\refSources; use RuntimeException; -use rdfInterface\LiteralInterface as iLiteral; -use rdfInterface\NamedNodeInterface as iNamedNode; -use rdfInterface\TermInterface as iTerm; -use rdfInterface\QuadInterface as iQuad; -use rdfInterface\DatasetInterface as iDataset; -use rdfInterface\DatasetListQuadPartsInterface as iDatasetLQP; -use rdfInterface\DatasetNodeInterface as iDatasetNode; +use rdfInterface\LiteralInterface; +use rdfInterface\NamedNodeInterface; +use rdfInterface\TermInterface; +use rdfInterface\QuadInterface; +use rdfInterface\DatasetInterface; +use rdfInterface\DatasetNodeInterface; use rdfInterface2easyRdf\AsRdfInterface; use quickRdf\Dataset; use quickRdf\DataFactory as DF; -use rdfHelpers\DatasetNode; +use quickRdf\DatasetNode; use termTemplates\QuadTemplate as QT; use termTemplates\NamedNodeTemplate; use termTemplates\ValueTemplate; @@ -62,7 +61,7 @@ class PropertyMapping { const TYPE_LITERAL = 'literal'; const TYPE_RESOURCE = 'resource'; - private iNamedNode $property; + private NamedNodeInterface $property; private string $type; private string $action; private string $langProcess; @@ -70,11 +69,11 @@ class PropertyMapping { private int $maxPerLang; private string $match; private string $skip; - private iTerm $value; + private TermInterface $value; /** * - * @var array + * @var array */ private array $path; private DF $termsFactory; @@ -96,7 +95,8 @@ public function __construct(object $cfg) { $this->termsFactory = new DF(); } - public function resolveAndMerge(iDatasetNode $meta, iDatasetNode $extDbMeta, + public function resolveAndMerge(DatasetNodeInterface $meta, + DatasetNodeInterface $extDbMeta, UriNormalizer $normalizer, bool $normalize): void { $extDbMeta = $this->resolve($extDbMeta, $normalizer, $normalize, $meta->getNode()); if (count($extDbMeta->getDataset()) === 0) { @@ -123,15 +123,16 @@ public function resolveAndMerge(iDatasetNode $meta, iDatasetNode $extDbMeta, /** * - * @param iDatasetNode $meta + * @param DatasetNodeInterface $meta * @param UriNormalizer $normalizer UriNormalizer object allowing for * recursive resolution of URIs. - * @param iTerm|null $subject optional triples subject to be enforeced on + * @param TermInterface|null $subject optional triples subject to be enforeced on * ther returned data - * @return DatasetNode + * @return DatasetNodeInterface */ - public function resolve(iDatasetNode $meta, UriNormalizer $normalizer, - bool $normalize, ?iTerm $subject = null): iDatasetNode { + public function resolve(DatasetNodeInterface $meta, + UriNormalizer $normalizer, bool $normalize, + ?TermInterface $subject = null): DatasetNodeInterface { $values = $this->resolvePath($meta, $normalizer, $subject); $this->filter($values->getDataset()); $this->processLang($values->getDataset()); @@ -143,15 +144,16 @@ public function resolve(iDatasetNode $meta, UriNormalizer $normalizer, /** * - * @param DatasetNode $meta + * @param DatasetNodeInterface $meta * @param UriNormalizer $normalizer UriNormalizer object allowing for * recursive resolution of URIs. - * @param iTerm|null $subject optional triples subject to be enforeced on + * @param TermInterface|null $subject optional triples subject to be enforeced on * ther returned data - * @return DatasetNode + * @return DatasetNodeInterface */ - public function resolvePath(iDatasetNode $meta, UriNormalizer $normalizer, - ?iTerm $subject = null): iDatasetNode { + public function resolvePath(DatasetNodeInterface $meta, + UriNormalizer $normalizer, + ?TermInterface $subject = null): DatasetNodeInterface { $subject ??= $meta->getNode(); if (!empty($this->value)) { $data = new Dataset(); @@ -159,26 +161,26 @@ public function resolvePath(iDatasetNode $meta, UriNormalizer $normalizer, } else { $data = $this->resolveRecursively($meta->getDataset(), $meta->getNode(), $normalizer, $this->path); // fix their subject and map the predicate - $data->forEach(fn(iQuad $x) => $x->withSubject($subject)->withPredicate($this->property)); + $data->forEach(fn(QuadInterface $x) => $x->withSubject($subject)->withPredicate($this->property)); } return $meta->withDataset($data)->withNode($subject); } - public function getProperty(): iNamedNode { + public function getProperty(): NamedNodeInterface { return $this->property; } - private function filter(iDataset $values): void { + private function filter(DatasetInterface $values): void { $match = $this->match; $skip = $this->skip; if (empty($match) && empty($skip)) { return; } $skip = empty($skip) ? '^$' : $skip; - $values->deleteExcept(fn(iQuad $x) => preg_match("`$match`", $x->getObject()->getValue()) && !preg_match("`$skip`", $x->getObject()->getValue())); + $values->deleteExcept(fn(QuadInterface $x) => preg_match("`$match`", $x->getObject()->getValue()) && !preg_match("`$skip`", $x->getObject()->getValue())); } - private function processLang(iDataset $meta): void { + private function processLang(DatasetInterface $meta): void { if ($this->type !== self::TYPE_LITERAL) { return; } @@ -186,13 +188,13 @@ private function processLang(iDataset $meta): void { $value = $this->langValue; $maxCount = $this->maxPerLang; $counts = []; - $meta->forEach(function (iQuad $x) use ($process, $value, $maxCount, - &$counts) { + $meta->forEach(function (QuadInterface $x) use ($process, $value, + $maxCount, &$counts) { $literal = $x->getObject(); - if (!($literal instanceof iLiteral)) { + if (!($literal instanceof LiteralInterface)) { return $x->withObject(DF::literal($literal->getValue())); } - /* @var $literal iLiteral */ + /* @var $literal LiteralInterface */ $lang = match ($process) { PropertyMapping::LANG_PASS => $literal->getLang(), PropertyMapping::LANG_ASSURE => $literal->getLang() ?? $value, @@ -209,15 +211,15 @@ private function processLang(iDataset $meta): void { }); } - private function normalize(iDataset $meta, UriNormalizer $normalizer): void { - $meta->forEach(function (iQuad $x) use ($normalizer) { + private function normalize(DatasetInterface $meta, UriNormalizer $normalizer): void { + $meta->forEach(function (QuadInterface $x) use ($normalizer) { $obj = $x->getObject(); - if (!($obj instanceof iLiteral) && !($obj instanceof iNamedNode)) { + if (!($obj instanceof LiteralInterface) && !($obj instanceof NamedNodeInterface)) { return $x; } try { $val = $normalizer->normalize($obj->getValue()); - if ($obj instanceof iNamedNode) { + if ($obj instanceof NamedNodeInterface) { $val = DF::namedNode($val); } else { $val = DF::literal($val, $obj->getLang(), $obj->getDatatype()); @@ -231,14 +233,15 @@ private function normalize(iDataset $meta, UriNormalizer $normalizer): void { /** * - * @param iDatasetLQP $meta - * @param iNamedNode $sbj + * @param DatasetInterface $meta + * @param NamedNodeInterface $sbj * @param UriNormalizer $normalizer - * @param array $path - * @return Dataset + * @param array $path + * @return DatasetInterface */ - private function resolveRecursively(iDatasetLQP $meta, iNamedNode $sbj, - UriNormalizer $normalizer, array $path): Dataset { + private function resolveRecursively(DatasetInterface $meta, + NamedNodeInterface $sbj, + UriNormalizer $normalizer, array $path): DatasetInterface { if (count($path) < 2) { return $meta->copy(new QT($sbj, $path[0])); } diff --git a/src/acdhOeaw/arche/refSources/PropertyMappings.php b/src/acdhOeaw/arche/refSources/PropertyMappings.php index e1dd9d1..865fa6b 100644 --- a/src/acdhOeaw/arche/refSources/PropertyMappings.php +++ b/src/acdhOeaw/arche/refSources/PropertyMappings.php @@ -34,7 +34,7 @@ use zozlak\RdfConstants as RDF; use termTemplates\QuadTemplate as QT; use quickRdf\DataFactory as DF; -use rdfInterface\QuadInterface as iQuad; +use rdfInterface\QuadInterface; /** * Description of PropertyMappings @@ -98,7 +98,7 @@ public function getRule(string $dbName): UriNormalizerRule { /** * - * @return array + * @return array */ public function mapIdentifiers(DatasetNodeInterface $meta, ?string $dbName = null): array { @@ -155,7 +155,7 @@ private function getId(string $dbName, string $class): string { private function getClasses(DatasetNodeInterface $meta, ?string $dbName): array { $dbName ??= $this->matchExternalDatabase($meta->getNode()->getValue()); $classes = $meta->getDataset()->copy(new QT($meta->getNode(), DF::namedNode(RDF::RDF_TYPE))); - $classes = array_map(fn(iQuad $x) => $this->getId($dbName, $x->getObject()->getValue()), iterator_to_array($classes)); + $classes = array_map(fn(QuadInterface $x) => $this->getId($dbName, $x->getObject()->getValue()), iterator_to_array($classes)); $classes = array_intersect($classes, array_keys($this->mappings)); return $classes; }