Skip to content

Commit

Permalink
Adjusted to rdfInterface v2
Browse files Browse the repository at this point in the history
  • Loading branch information
zozlak committed Jul 26, 2023
1 parent 4f32fd3 commit 4984596
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 65 deletions.
8 changes: 4 additions & 4 deletions arche-ref-sources
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
use zozlak\argparse\ArgumentParser as AP;
use quickRdf\DataFactory as DF;
use quickRdf\Dataset;
use quickRdf\DatasetNode;
use quickRdfIo\Util as ioUtil;
use rdfInterface2easyRdf\AsRdfInterface;
use rdfHelpers\DatasetNode;
use acdhOeaw\UriNormalizer;
use acdhOeaw\UriNormRules;
use acdhOeaw\UriNormalizerRule;
Expand Down Expand Up @@ -68,8 +68,8 @@ $parser->addArgument('--id', help: 'process only resource with a given id');
$parser->addArgument('--extDbName', help: 'process only resource from a given external authority file');
$parser->addArgument('--repoUrl', help: 'use a given repository instance (overwrites the `repositoryUrl` property read from the config file)');
$parser->addArgument('--inputFile', help: 'read resources from a given RDF file instead of the ARCHE repository');
$parser->addArgument('--user', help: 'user name used for repository authentication (not important if --test or --resolveOnly are used)');
$parser->addArgument('--pswd', help: 'password used for repository authentication (not important if --test or --resolveOnly are used)');
$parser->addArgument('--user', help: 'user name used for repository authentication (not important in mode "test", "parse" and "resolve")');
$parser->addArgument('--pswd', help: 'password used for repository authentication (not important in mode "test", "parse" and "resolve")');
$parser->addArgument('--mode', default: 'parse', choices: array_keys($modes), help: "operation mode\n - resolve - only try to resolve the external URI (finds broken external URIs)\n - parse [default] - resolve the external URI and parse the output (when used with --verbose and/or --output it allows to inspect the data provided by the external source and test the metadata mapping defined in the configuration file)\n - test - tries to update the repository resource with data fetched from the external source (so doorkeeper checks are performed) but rolls back the update no matter if it was successful or not\n - update - updates the repository resource with data fetched from the external source\n");
$parser->addArgument('--verbose', action: AP::ACTION_STORE_TRUE, help: 'provide more verbose output, especially print the data fetched from the external reference source');
$parser->addArgument('--output', help: "when used, the data to be saved to the repository is also saved in a TTL file (the output isn't created in mode=resolve)");
Expand Down Expand Up @@ -177,7 +177,7 @@ foreach ($cfg->referenceSources as $extDbName => $extDbCfg) {
// don't even try to resolve identifiers for which there's no mapping
$mappings->matchExternalDatabase($id);
echo " fetching data for $id\n";
$meta = AsRdfInterface::addDatasetNode($normalizer->fetch($id), new DF(), fn($x) => new DatasetNode(new Dataset(), $x));
$meta = AsRdfInterface::addDatasetNode($normalizer->fetch($id), new DF(), fn($x) => new DatasetNode($x));
$uriStr = $meta->getNode()->getValue();
$extDbName = $mappings->matchExternalDatabase($uriStr);
if (!isset($entityExtMeta[$extDbName])) {
Expand Down
10 changes: 5 additions & 5 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
"acdh-oeaw/uri-normalizer": "^2",
"zozlak/argparse": "^1",
"sweetrdf/quick-rdf-io": "^1",
"sweetrdf/quick-rdf": "^1",
"sweetrdf/quick-rdf": "^2.0.0-RC1",
"sweetrdf/rdf-interface": "^2.0.0-RC1",
"sweetrdf/term-templates": "^1",
"sweetrdf/rdfinterface2easyrdf": "^0.2"
"sweetrdf/rdfinterface2easyrdf": "^0.3"
},
"autoload": {
"psr-4": {
Expand All @@ -31,7 +32,7 @@
},
"require-dev": {
"phpunit/phpunit": "^9",
"phpstan/phpstan": "^1"
"phpstan/phpstan": "*"
},
"autoload-dev": {
"psr-4": {
Expand All @@ -40,6 +41,5 @@
},
"bin": [
"arche-ref-sources"
],
"minimum-stability": "RC"
]
}
2 changes: 1 addition & 1 deletion src/acdhOeaw/arche/refSources/NamedEntityFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace acdhOeaw\arche\refSources;

use quickRdf\DataFactory as DF;
use rdfHelpers\DatasetNode;
use quickRdf\DatasetNode;
use termTemplates\QuadTemplate as QT;
use acdhOeaw\UriNormalizer;
use acdhOeaw\UriNormalizerException;
Expand Down
11 changes: 8 additions & 3 deletions src/acdhOeaw/arche/refSources/NamedEntityInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace acdhOeaw\arche\refSources;

use acdhOeaw\UriNormalizer;
use rdfHelpers\DatasetNode;
use rdfInterface\DatasetNodeInterface;

/**
*
Expand All @@ -44,7 +44,12 @@ public function getUri(): string;
*/
public function getIdentifiers(UriNormalizer $normalizer): array;

public function getMetadata(): DatasetNode;
public function getMetadata(): DatasetNodeInterface;

public function updateMetadata(DatasetNode $meta): array;
/**
*
* @param DatasetNodeInterface $meta
* @return array<string>
*/
public function updateMetadata(DatasetNodeInterface $meta): array;
}
13 changes: 11 additions & 2 deletions src/acdhOeaw/arche/refSources/NamedEntityIteratorFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
namespace acdhOeaw\arche\refSources;

use quickRdf\Dataset;
use quickRdf\DatasetNode;
use quickRdf\DataFactory;
use termTemplates\QuadTemplate as QT;
use termTemplates\ValueTemplate as VT;
use rdfHelpers\DatasetNode;
use quickRdfIo\Util as ioUtil;
use zozlak\RdfConstants as RDF;
use acdhOeaw\arche\lib\Schema;
Expand All @@ -46,8 +46,17 @@ class NamedEntityIteratorFile implements NamedEntityIteratorInterface {
private Dataset $graph;
private Schema $schema;
private Repo $repo;

/**
*
* @var array<\termTemplates\QuadTemplate>
*/
private array $filters = [];
private ?int $limit = null;
/**
*
* @var array<\rdfInterface\TermInterface>
*/
private array $matching;

public function __construct(string $rdfFilePath, Repo $repo) {
Expand Down Expand Up @@ -79,7 +88,7 @@ public function getNamedEntities(): \Generator {
}
foreach ($this->matching as $i) {
$meta = $this->graph->copy(new QT($i));
$meta = new DatasetNode($meta, $i);
$meta = DatasetNode::factory($i)->withDataset($meta);
yield new NamedEntityFile($meta, $this, $this->repo);
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/acdhOeaw/arche/refSources/NamedEntityRepo.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@

namespace acdhOeaw\arche\refSources;

use quickRdf\Dataset;
use quickRdf\DataFactory;
use rdfHelpers\DatasetNode;
use quickRdf\DatasetNode;
use acdhOeaw\UriNormalizer;
use acdhOeaw\UriNormalizerException;
use acdhOeaw\arche\lib\Repo;
use acdhOeaw\arche\lib\RepoResource;
use rdfInterface\DatasetNodeInterface;
use rdfInterface2easyRdf\AsRdfInterface;

/**
Expand All @@ -52,11 +52,11 @@ public function __construct(RepoResource $res) {
$this->repo = $res->getRepo();
}

public function getMetadata(): DatasetNode {
public function getMetadata(): DatasetNodeInterface {
return AsRdfInterface::addDatasetNode(
$this->res->getMetadata(),
new DataFactory(),
fn($x) => new DatasetNode(new Dataset(), $x)
fn($x) => DatasetNode::factory($x)
);
}

Expand Down
10 changes: 8 additions & 2 deletions src/acdhOeaw/arche/refSources/NamedEntityTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
namespace acdhOeaw\arche\refSources;

use quickRdf\DataFactory;
use rdfHelpers\DatasetNode;
use acdhOeaw\arche\lib\RepoResource;
use rdfInterface\DatasetNodeInterface;
use rdfInterface2easyRdf\AsEasyRdf;
use termTemplates\QuadTemplate as QT;
use acdhOeaw\arche\lib\SearchTerm;
Expand All @@ -45,7 +45,13 @@ trait NamedEntityTrait {

private Repo $repo;

public function updateMetadata(DatasetNode $meta, bool $test = true): array {
/**
*
* @param DatasetNodeInterface $meta
* @param bool $test
* @return array<string>
*/
public function updateMetadata(DatasetNodeInterface $meta, bool $test = true): array {
if ($this->repo->inTransaction()) {
$this->repo->rollback();
}
Expand Down
85 changes: 44 additions & 41 deletions src/acdhOeaw/arche/refSources/PropertyMapping.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@
namespace acdhOeaw\arche\refSources;

use RuntimeException;
use rdfInterface\LiteralInterface as iLiteral;
use rdfInterface\NamedNodeInterface as iNamedNode;
use rdfInterface\TermInterface as iTerm;
use rdfInterface\QuadInterface as iQuad;
use rdfInterface\DatasetInterface as iDataset;
use rdfInterface\DatasetListQuadPartsInterface as iDatasetLQP;
use rdfInterface\DatasetNodeInterface as iDatasetNode;
use rdfInterface\LiteralInterface;
use rdfInterface\NamedNodeInterface;
use rdfInterface\TermInterface;
use rdfInterface\QuadInterface;
use rdfInterface\DatasetInterface;
use rdfInterface\DatasetNodeInterface;
use rdfInterface2easyRdf\AsRdfInterface;
use quickRdf\Dataset;
use quickRdf\DataFactory as DF;
use rdfHelpers\DatasetNode;
use quickRdf\DatasetNode;
use termTemplates\QuadTemplate as QT;
use termTemplates\NamedNodeTemplate;
use termTemplates\ValueTemplate;
Expand All @@ -62,19 +61,19 @@ class PropertyMapping {
const TYPE_LITERAL = 'literal';
const TYPE_RESOURCE = 'resource';

private iNamedNode $property;
private NamedNodeInterface $property;
private string $type;
private string $action;
private string $langProcess;
private string $langValue;
private int $maxPerLang;
private string $match;
private string $skip;
private iTerm $value;
private TermInterface $value;

/**
*
* @var array<iNamedNode>
* @var array<NamedNodeInterface>
*/
private array $path;
private DF $termsFactory;
Expand All @@ -96,7 +95,8 @@ public function __construct(object $cfg) {
$this->termsFactory = new DF();
}

public function resolveAndMerge(iDatasetNode $meta, iDatasetNode $extDbMeta,
public function resolveAndMerge(DatasetNodeInterface $meta,
DatasetNodeInterface $extDbMeta,
UriNormalizer $normalizer, bool $normalize): void {
$extDbMeta = $this->resolve($extDbMeta, $normalizer, $normalize, $meta->getNode());
if (count($extDbMeta->getDataset()) === 0) {
Expand All @@ -123,15 +123,16 @@ public function resolveAndMerge(iDatasetNode $meta, iDatasetNode $extDbMeta,

/**
*
* @param iDatasetNode $meta
* @param DatasetNodeInterface $meta
* @param UriNormalizer $normalizer UriNormalizer object allowing for
* recursive resolution of URIs.
* @param iTerm|null $subject optional triples subject to be enforeced on
* @param TermInterface|null $subject optional triples subject to be enforeced on
* ther returned data
* @return DatasetNode
* @return DatasetNodeInterface
*/
public function resolve(iDatasetNode $meta, UriNormalizer $normalizer,
bool $normalize, ?iTerm $subject = null): iDatasetNode {
public function resolve(DatasetNodeInterface $meta,
UriNormalizer $normalizer, bool $normalize,
?TermInterface $subject = null): DatasetNodeInterface {
$values = $this->resolvePath($meta, $normalizer, $subject);
$this->filter($values->getDataset());
$this->processLang($values->getDataset());
Expand All @@ -143,56 +144,57 @@ public function resolve(iDatasetNode $meta, UriNormalizer $normalizer,

/**
*
* @param DatasetNode $meta
* @param DatasetNodeInterface $meta
* @param UriNormalizer $normalizer UriNormalizer object allowing for
* recursive resolution of URIs.
* @param iTerm|null $subject optional triples subject to be enforeced on
* @param TermInterface|null $subject optional triples subject to be enforeced on
* ther returned data
* @return DatasetNode
* @return DatasetNodeInterface
*/
public function resolvePath(iDatasetNode $meta, UriNormalizer $normalizer,
?iTerm $subject = null): iDatasetNode {
public function resolvePath(DatasetNodeInterface $meta,
UriNormalizer $normalizer,
?TermInterface $subject = null): DatasetNodeInterface {
$subject ??= $meta->getNode();
if (!empty($this->value)) {
$data = new Dataset();
$data->add(DF::quad($subject, $this->property, $this->value));
} else {
$data = $this->resolveRecursively($meta->getDataset(), $meta->getNode(), $normalizer, $this->path);
// fix their subject and map the predicate
$data->forEach(fn(iQuad $x) => $x->withSubject($subject)->withPredicate($this->property));
$data->forEach(fn(QuadInterface $x) => $x->withSubject($subject)->withPredicate($this->property));
}
return $meta->withDataset($data)->withNode($subject);
}

public function getProperty(): iNamedNode {
public function getProperty(): NamedNodeInterface {
return $this->property;
}

private function filter(iDataset $values): void {
private function filter(DatasetInterface $values): void {
$match = $this->match;
$skip = $this->skip;
if (empty($match) && empty($skip)) {
return;
}
$skip = empty($skip) ? '^$' : $skip;
$values->deleteExcept(fn(iQuad $x) => preg_match("`$match`", $x->getObject()->getValue()) && !preg_match("`$skip`", $x->getObject()->getValue()));
$values->deleteExcept(fn(QuadInterface $x) => preg_match("`$match`", $x->getObject()->getValue()) && !preg_match("`$skip`", $x->getObject()->getValue()));
}

private function processLang(iDataset $meta): void {
private function processLang(DatasetInterface $meta): void {
if ($this->type !== self::TYPE_LITERAL) {
return;
}
$process = $this->langProcess;
$value = $this->langValue;
$maxCount = $this->maxPerLang;
$counts = [];
$meta->forEach(function (iQuad $x) use ($process, $value, $maxCount,
&$counts) {
$meta->forEach(function (QuadInterface $x) use ($process, $value,
$maxCount, &$counts) {
$literal = $x->getObject();
if (!($literal instanceof iLiteral)) {
if (!($literal instanceof LiteralInterface)) {
return $x->withObject(DF::literal($literal->getValue()));
}
/* @var $literal iLiteral */
/* @var $literal LiteralInterface */
$lang = match ($process) {
PropertyMapping::LANG_PASS => $literal->getLang(),
PropertyMapping::LANG_ASSURE => $literal->getLang() ?? $value,
Expand All @@ -209,15 +211,15 @@ private function processLang(iDataset $meta): void {
});
}

private function normalize(iDataset $meta, UriNormalizer $normalizer): void {
$meta->forEach(function (iQuad $x) use ($normalizer) {
private function normalize(DatasetInterface $meta, UriNormalizer $normalizer): void {
$meta->forEach(function (QuadInterface $x) use ($normalizer) {
$obj = $x->getObject();
if (!($obj instanceof iLiteral) && !($obj instanceof iNamedNode)) {
if (!($obj instanceof LiteralInterface) && !($obj instanceof NamedNodeInterface)) {
return $x;
}
try {
$val = $normalizer->normalize($obj->getValue());
if ($obj instanceof iNamedNode) {
if ($obj instanceof NamedNodeInterface) {
$val = DF::namedNode($val);
} else {
$val = DF::literal($val, $obj->getLang(), $obj->getDatatype());
Expand All @@ -231,14 +233,15 @@ private function normalize(iDataset $meta, UriNormalizer $normalizer): void {

/**
*
* @param iDatasetLQP $meta
* @param iNamedNode $sbj
* @param DatasetInterface $meta
* @param NamedNodeInterface $sbj
* @param UriNormalizer $normalizer
* @param array<iNamedNode> $path
* @return Dataset
* @param array<NamedNodeInterface> $path
* @return DatasetInterface
*/
private function resolveRecursively(iDatasetLQP $meta, iNamedNode $sbj,
UriNormalizer $normalizer, array $path): Dataset {
private function resolveRecursively(DatasetInterface $meta,
NamedNodeInterface $sbj,
UriNormalizer $normalizer, array $path): DatasetInterface {
if (count($path) < 2) {
return $meta->copy(new QT($sbj, $path[0]));
}
Expand Down
Loading

0 comments on commit 4984596

Please sign in to comment.