Skip to content

Commit

Permalink
WIP on #44.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Aug 24, 2020
1 parent 26099bf commit f5a6637
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 19 deletions.
3 changes: 3 additions & 0 deletions sample_islandora_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ gemini_auth_header: 'Bearer islandora'
# Absolute or relative to the Riprap application directory.
views_pager_data_file_path: 'var/fetchresourcelist.from.drupal.pager.txt'

# Required. Non-standard or custom media need to have the fieldname they use to contain the file added to this list.
drupal_file_fieldnames: ['field_media_audio', 'field_media_document', 'field_edited_text', 'field_media_file', 'field_media_image', 'field_media_video_file']

plugins.fetchdigest: PluginFetchDigestFromFedoraAPI
fedoraapi_method: HEAD
fedoraapi_digest_header_leader_pattern: "^.+="
Expand Down
47 changes: 47 additions & 0 deletions sample_islandora_config_fetch_digest_from_drupal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Sample Riprap config file for using Islandora/database plugins.
# Requires that the "Riprap resource list" View be enabled in
# the Islandora instance. This View is bundled with the Islandora Riprap
# module.
#
# This plugin is agnostic to which media have fixity event checks performed
# on them. The filters in the "Riprap resource list" View determine that.
# See the View's filter criteria GUI for examples.
#
# This plugin differs from sample_islandora_config.yml in that it gets the
# media file's digest from Drupal, not Fedora. To do this, it calls the
# /islandora_riprap/checksum/{file_hash}/{algorithm} endpoint added by the
# Islandora Riprap module.

####################
# General settings #
####################

# thin: true
# One of 'md5', 'sha1', or 'sha256'.
fixity_algorithm: sha1

# Absolute or relative to the Riprap application directory.
failures_log_path: 'var/riprap_failed_events.log'

###################
# Plugin settings #
###################

# Use this plugin if you want to use the 'Riprap resource (media) list' View provided by Islandora Riprap.
plugins.fetchresourcelist: ['PluginFetchResourceListFromDrupalView']
drupal_baseurl: 'http://localhost:8000'
drupal_user: admin
drupal_password: islandora
# Absolute or relative to the Riprap application directory.
views_pager_data_file_path: 'var/fetchresourcelist.from.drupal.pager.txt'
use_fedora_urls: false

# Required. Non-standard or custom media need to have the fieldname they use to contain the file added to this list.
drupal_file_fieldnames: ['field_media_audio', 'field_media_document', 'field_edited_text', 'field_media_file', 'field_media_image', 'field_media_video_file']

plugins.fetchdigest: PluginFetchDigestFromDrupal

plugins.persist: PluginPersistToDatabase

plugins.postcheck: ['PluginPostCheckCopyFailures']

34 changes: 28 additions & 6 deletions src/Plugin/PluginFetchDigestFromDrupal.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,55 @@ class PluginFetchDigestFromDrupal extends AbstractFetchDigestPlugin
* Gets the resource's digest from the REST endpoint provided by Islandora Riprap.
*
* @param string $resource_id
* The file's UUID.
* This is probably wrong.
* The media's Drupal media ID. From this, we get the file's URL
* using the following logic: with this media ID, we perform
* a REST request to get the media. That request's response
* contains an entry for the field that contains the file, e.g.
* field_media_audio, field_media_document, field_edited_text,
* field_media_file, field_media_image, field_media_video_file.
* Within that entry, we get the file's URL, and from that, we
* can get the file's digest.
*
* @return string $digest
* The digest value.
*
* @return string|bool
* The digest value, false on error.
*/
public function execute($resource_id)
{
if (isset($this->settings['drupal_baseurl'])) {
$this->drupal_base_url = $this->settings['drupal_baseurl'];
} else {
$this->drupal_base_url = 'http://localhost:8000';
}

if (isset($this->settings['fixity_algorithm'])) {
$this->fixity_algorithm = $this->settings['fixity_algorithm'];
} else {
$this->fixity_algorithm = 'sha1';
}

$this->drupal_file_fieldnames = $this->settings['drupal_file_fieldnames'];

$client = new \GuzzleHttp\Client();
// @todo: Wrap in try/catch.

// @todo: Request is to /islandora_riprap/checksum/{file_uuid}/{algorithm}, not to the resource ID as with Fedora.
$url = $resource_id;
if (!strlen($url)) {
if (!strlen($resource_id)) {
if ($this->logger) {
$this->logger->info("PluginFetchDigestFromDrupal exited due to empty resource ID.");
}
return;
}

$response = $client->request('GET', $url, [
// @todo: Request is to /islandora_riprap/checksum/{file_uuid}/{algorithm},
// not to the resource URI as with Fedora.
$get_digest_url = $this->drupal_base_url .
'/islandora_riprap/checksum/' . $resource_id . '/' . $this->fixity_algorithm;

$response = $client->request('GET', $get_digest_url, [
'http_errors' => false,
'auth' => [$this->drupal_user, $this->drupal_password]
]);
$status_code = $response->getStatusCode();
$allowed_codes = array(200);
Expand Down
108 changes: 95 additions & 13 deletions src/Plugin/PluginFetchResourceListFromDrupalView.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?php
// src/Plugin/PluginFetchResourceListFromDrupal.php
// src/Plugin/PluginFetchResourceListFromDrupalView.php

namespace App\Plugin;

Expand All @@ -12,6 +12,11 @@ public function execute()
{
$output = new ConsoleOutput();

if (isset($this->settings['fixity_algorithm'])) {
$this->fixity_algorithm = $this->settings['fixity_algorithm'];
} else {
$this->fixity_algorithm = 'sha1';
}
if (isset($this->settings['drupal_baseurl'])) {
$this->drupal_base_url = $this->settings['drupal_baseurl'];
} else {
Expand All @@ -27,6 +32,9 @@ public function execute()
} else {
$this->drupal_password = 'islandora';
}

$this->drupal_file_fieldnames = $this->settings['drupal_file_fieldnames'];

if (isset($this->settings['use_fedora_urls'])) {
$this->use_fedora_urls = $this->settings['use_fedora_urls'];
} else {
Expand Down Expand Up @@ -86,17 +94,29 @@ public function execute()
$output_resource_records = [];
foreach ($media_list as $media) {
if ($this->use_fedora_urls) {
// @todo: getFedoraUrl() returns false on failure, so build in logic here to log that
// @todo: getFileUrlFromFedora() returns false on failure, so build in logic here to log that
// the resource ID / URL cannot be found. (But, http responses are already logged in
// getFedoraUrl() so maybe we don't need to log here?)
$file_fedora_url = $this->getFedoraUrl($media['mid']);
// getFileUrlFromFedora() so maybe we don't need to log here?)
$file_fedora_url = $this->getFileUrlFromFedora($media['mid']);
if (strlen($file_fedora_url)) {
$resource_record_object = new \stdClass;
$resource_record_object->resource_id = $file_fedora_url;
$resource_record_object->last_modified_timestamp = $media['changed'];
$output_resource_records[] = $resource_record_object;
}
}
if (!$this->use_fedora_urls) {
// @todo: getFileUrlFromDrupal() returns false on failure, so build in logic here to log that
// the resource ID / URL cannot be found. (But, http responses are already logged in
// getFileUrlFromFedora() so maybe we don't need to log here?)
$file_drupal_url = $this->getFileUuidFromDrupal($media['mid']);
if (strlen($file_drupal_url)) {
$resource_record_object = new \stdClass;
$resource_record_object->resource_id = $file_drupal_url;
$resource_record_object->last_modified_timestamp = $media['changed'];
$output_resource_records[] = $resource_record_object;
}
}
}
$this->setPageNumber($page_number, $num_media);

Expand All @@ -115,9 +135,9 @@ public function execute()
* The media ID.
*
* @return string
* The Fedora URL corresponding to the UUID, or false.
* The Fedora URL corresponding to the media ID, or false.
*/
private function getFedoraUrl($mid)
private function getFileUrlFromFedora($mid)
{
// First, retrieve the media entity from Drupal.
$media_url = $this->drupal_base_url . '/media/' . $mid . '?_format=json';
Expand All @@ -128,11 +148,12 @@ private function getFedoraUrl($mid)
]);
$media_response_body = $media_response->getBody()->getContents();
$media_response_body = json_decode($media_response_body, true);
if (isset($media_response_body['field_media_image'])) {
$file_field = 'field_media_image';
}
if (isset($media_response_body['field_media_file'])) {
$file_field = 'field_media_file';

foreach ($this->drupal_file_fieldnames as $file_fieldname) {
if (isset($media_response_body[$file_fieldname])) {
$file_field = $file_fieldname;
break;
}
}
$target_file_uuid = $media_response_body[$file_field][0]['target_uuid'];

Expand All @@ -155,7 +176,7 @@ private function getFedoraUrl($mid)
} else {
if ($this->logger) {
$this->logger->error(
"PluginFetchResourceListFromDrupal could not get Fedora URL from Gemini.",
"PluginFetchResourceListFromDrupal could not get Fedora File URL from Gemini.",
array(
'HTTP response code' => $code
)
Expand All @@ -166,7 +187,7 @@ private function getFedoraUrl($mid)
} catch (Exception $e) {
if ($this->logger) {
$this->logger->error(
"PluginFetchResourceListFromDrupal could not get Fedora URL from Gemini.",
"PluginFetchResourceListFromDrupal could not get Fedora File URL from Gemini.",
array(
'HTTP response code' => $code,
'Exception message' => $e->getMessage()
Expand All @@ -177,6 +198,67 @@ private function getFedoraUrl($mid)
}
}

/**
* Get a URL for a File entity from Drupal.
*
* @param string $mid
* The media ID.
*
* @return string
* The UUID of the file, or false. We use the UUID to get the
* digest and URL from the /islandora_riprap/checksum endpoint.
*/
private function getFileUuidFromDrupal($mid)
{
// Retrieve the media entity from Drupal.
try {
$url = $this->drupal_base_url . '/media/' . $mid . '?_format=json';
$client = new \GuzzleHttp\Client();
$response = $client->request('GET', $url, [
'http_errors' => false,
'auth' => [$this->drupal_user, $this->drupal_password]
]);
$code = $response->getStatusCode();
$body = $response->getBody()->getContents();
$body = json_decode($body, true);

foreach ($this->drupal_file_fieldnames as $file_fieldname) {
if (isset($body[$file_fieldname])) {
$file_field = $file_fieldname;
break;
}
}
if ($code == 200) {
$body = $response->getBody()->getContents();
$body_array = json_decode($body, true);
return $body_array[$file_field][0]['url'];
} elseif ($code == 404) {
return false;
} else {
if ($this->logger) {
$this->logger->error(
"PluginFetchResourceListFromDrupal could not get File URL from Drupal.",
array(
'HTTP response code' => $code
)
);
}
return false;
}
} catch (Exception $e) {
if ($this->logger) {
$this->logger->error(
"PluginFetchResourceListFromDrupal could not get File URL from Drupal.",
array(
'HTTP response code' => $code,
'Exception message' => $e->getMessage()
)
);
}
return false;
}
}

/**
* Sets the page offset to use in the next REST request to the Drupal View.
*
Expand Down

0 comments on commit f5a6637

Please sign in to comment.