diff --git a/modules/islandora_iiif/README.md b/modules/islandora_iiif/README.md index c1f89872c..7cbcc8840 100644 --- a/modules/islandora_iiif/README.md +++ b/modules/islandora_iiif/README.md @@ -38,6 +38,21 @@ This module implements a Views Style plugin. It provides the following settings: 1. Tile Source: A field that was added to the views list of fields with the image to be served. This should be a File or Image type field on a Media. 2. Structured Text field: This lets you specify a file field where OCR text with positional data, e.g., hOCR can be found. + +### Media Attributes from IIIF Action + +The module also provides an action that lets a site owner populate a TIFF or JP2 image's width and +height attributes into fields so the IIIF server is not bogged down trying to generate a manifest if +it doesn't have them. + +To use it, either: + +- Add it as a derivative reaction to a node with an Original FIle as its child, or +- Use it as a batch action, such as on a Paged Content object's list of child pages. + +The action assumes the media type has fields with machine names of field_height and +field_width. Making this configurable would mean they would not appear +on entity list pages. ## Documentation Official documentation is available on the [Islandora 8 documentation site](https://islandora.github.io/documentation/). diff --git a/modules/islandora_iiif/config/optional/system.action.media_attributes_from_iiif_action.yml b/modules/islandora_iiif/config/optional/system.action.media_attributes_from_iiif_action.yml new file mode 100644 index 000000000..36e7c8f9b --- /dev/null +++ b/modules/islandora_iiif/config/optional/system.action.media_attributes_from_iiif_action.yml @@ -0,0 +1,10 @@ +langcode: en +status: true +dependencies: + module: + - islandora_iiif +id: media_attributes_from_iiif_action +label: 'Media attributes from IIIF' +type: node +plugin: islandora_iiif:media_attributes_from_iiif_action:media +configuration: { } diff --git a/modules/islandora_iiif/config/schema/islandora_iiif.schema.yml b/modules/islandora_iiif/config/schema/islandora_iiif.schema.yml index f9e870efa..69ed0d7a8 100644 --- a/modules/islandora_iiif/config/schema/islandora_iiif.schema.yml +++ b/modules/islandora_iiif/config/schema/islandora_iiif.schema.yml @@ -16,3 +16,18 @@ views.style.iiif_manifest: type: sequence sequence: type: string + label: "Tile source field(s)" + iiif_ocr_file_field: + type: sequence + sequence: + type: string: + label: "IIIF hOCR file field" + structured_text_term: + type: string + label: "Structured text term" + getdimensions_from_sewrver: + type: boolean + label: "Retrieve image dimensions from IIIF server" + search_endpoint: + type: string + label: "Search endpoint path" diff --git a/modules/islandora_iiif/islandora_iiif.install b/modules/islandora_iiif/islandora_iiif.install new file mode 100644 index 000000000..6d79442c2 --- /dev/null +++ b/modules/islandora_iiif/islandora_iiif.install @@ -0,0 +1,18 @@ +getPath('islandora_iiif') . '/config/optional/' . $config_id . '.yml'; + $data = Yaml::parseFile($config_path); + \Drupal::configFactory()->getEditable($config_id)->setData($data)->save(TRUE); +} diff --git a/modules/islandora_iiif/islandora_iiif.services.yml b/modules/islandora_iiif/islandora_iiif.services.yml new file mode 100644 index 000000000..fd39211cd --- /dev/null +++ b/modules/islandora_iiif/islandora_iiif.services.yml @@ -0,0 +1,4 @@ +services: + islandora_iiif: + class: Drupal\islandora_iiif\IiifInfo + arguments: ['@config.factory', '@http_client', '@logger.channel.islandora', '@jwt.authentication.jwt'] diff --git a/modules/islandora_iiif/src/IiifInfo.php b/modules/islandora_iiif/src/IiifInfo.php new file mode 100644 index 000000000..c39a3cee5 --- /dev/null +++ b/modules/islandora_iiif/src/IiifInfo.php @@ -0,0 +1,153 @@ +configFactory = $config_factory; + + $this->iiifConfig = $this->configFactory->get('islandora_iiif.settings'); + $this->httpClient = $http_client; + $this->logger = $channel; + $this->jwtAuth = $jwt_auth; + } + + /** + * The IIIF base URL for an image. + * + * Visiting this URL will resolve to the info.json for the image. + * + * @return string + * The absolute URL on the IIIF server. + */ + public function baseUrl($image) { + + if ($this->iiifConfig->get('use_relative_paths')) { + $file_url = ltrim($image->createFileUrl(TRUE), '/'); + } + else { + $file_url = $image->createFileUrl(FALSE); + } + + $iiif_address = $this->iiifConfig->get('iiif_server'); + $iiif_url = rtrim($iiif_address, '/') . '/' . urlencode($file_url); + + return $iiif_url; + } + + /** + * Retrieve an image's original dimensions via the IIIF server. + * + * @param \Drupal\File\FileInterface $file + * The image file. + * + * @return array|false + * The image dimensions in an array as [$width, $height] + */ + public function getImageDimensions(FileInterface $file) { + $iiif_url = $this->baseUrl($file); + try { + $info_json = $this->httpClient->request('get', $iiif_url, [ + 'headers' => [ + 'Authorization' => 'Bearer ' . $this->jwtAuth->generateToken(), + ], + ])->getBody(); + $resource = json_decode($info_json, TRUE); + $width = $resource['width']; + $height = $resource['height']; + if (is_numeric($width) && is_numeric($height)) { + return [intval($width), intval($height)]; + } + } + catch (ClientException | ConnectException | ServerException $e) { + $this->logger->info("Error getting image file dimensions from IIIF server: " . $e->getMessage()); + } + return FALSE; + } + +/** + * The IIIF base URL for an image. + * + * Visiting this URL will resolve to the full image resized to the maximum dimensions given. + * + * @see https://iiif.io/api/image/2.1/ + * + * @param Drupal\file\FileInterface $image + * The image entity. + * @param int width + * The maximum width of the image to be returned. 0 for no constraint. + * @param int $height + * The maxim um height of the image to be returned. 0 for no contraint. + * + * @return string + * The IIIF URl to retrieve the full image with the given max dimensions. + */ + public function getImageWithMaxDimensions($image, $width = 0, $height = 0) { + $base_url = $this->baseUrl($image); + return $base_url . "/full/!$width,$height/0/default.jpg"; + + } + +} diff --git a/modules/islandora_iiif/src/Plugin/Action/MediaAttributesFromIiif.php b/modules/islandora_iiif/src/Plugin/Action/MediaAttributesFromIiif.php new file mode 100644 index 000000000..2cb5c06ea --- /dev/null +++ b/modules/islandora_iiif/src/Plugin/Action/MediaAttributesFromIiif.php @@ -0,0 +1,165 @@ +httpClient = $http_client; + $this->iiifInfo = $iiif_info; + $this->utils = $islandora_utils; + $this->mediaSource = $media_source; + $this->logger = $channel; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + $container->get('datetime.time'), + $container->get('http_client'), + $container->get('islandora_iiif'), + $container->get('islandora.utils'), + $container->get('islandora.media_source_service'), + $container->get('logger.channel.islandora') + ); + } + + /** + * {@inheritdoc} + */ + public function execute($entity = NULL) { + $width = $height = FALSE; + + // Get the original File media use term. + $original_file_term = $this->utils->getTermForUri('http://pcdm.org/use#OriginalFile'); + + /** + * @var \Drupal\media\MediaInterface $original_file_media + */ + $original_file_mids = $this->utils->getMediaReferencingNodeAndTerm($entity, $original_file_term); + if (!empty($original_file_mids)) { + + // Ordinarily there shouldn't be more than one Original File media but + // it's not guaranteed. + foreach ($original_file_mids as $original_file_mid) { + + /** + * @var \Drupal\Media\MediaInterface $original_file_media + */ + $original_file_media = $this->entityTypeManager->getStorage('media')->load($original_file_mid); + + // Get the media MIME Type. + $original_file = $this->mediaSource->getSourceFile($original_file_media); + $mime_type = $original_file->getMimeType(); + + if (in_array($mime_type, ['image/tiff', 'image/jp2'])) { + [$width, $height] = $this->iiifInfo->getImageDimensions($original_file); + } + + // @todo Make field configurable. Low priority since this whole thing is a workaround for an Islandora limitation. + if ($original_file_media->hasField('field_width') && $original_file_media->hasField('field_height')) { + $original_file_media->set('field_height', $height); + $original_file_media->set('field_width', $width); + $original_file_media->save(); + } + } + } + } + + /** + * {@inheritdoc} + */ + public function access($object, AccountInterface $account = NULL, $return_as_object = FALSE) { + + /** @var \Drupal\Core\Entity\EntityInterface $object */ + return $object->access('update', $account, $return_as_object); + } + +} diff --git a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php index 5a2fb63b3..241b48008 100644 --- a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php +++ b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php @@ -6,20 +6,20 @@ use Drupal\Core\Entity\EntityInterface; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\Extension\ModuleHandlerInterface; -use Drupal\Core\File\FileSystemInterface; use Drupal\Core\Field\FieldItemInterface; +use Drupal\Core\File\FileSystemInterface; use Drupal\Core\Form\FormStateInterface; use Drupal\Core\Messenger\MessengerInterface; use Drupal\Core\Url; +use Drupal\media\Entity\Media; +use Drupal\islandora\IslandoraUtils; +use Drupal\islandora_iiif\IiifInfo; use Drupal\views\Plugin\views\style\StylePluginBase; use Drupal\views\ResultRow; use GuzzleHttp\Client; -use GuzzleHttp\Exception\ClientException; -use GuzzleHttp\Exception\ConnectException; -use GuzzleHttp\Exception\ServerException; use Symfony\Component\DependencyInjection\ContainerInterface; -use Symfony\Component\Serializer\SerializerInterface; use Symfony\Component\HttpFoundation\Request; +use Symfony\Component\Serializer\SerializerInterface; /** * Provide serializer format for IIIF Manifest. @@ -35,6 +35,14 @@ */ class IIIFManifest extends StylePluginBase { + /** + * Islandora utility functions. + * + * @var \Drupal\islandora\IslandoraUtils + */ + protected $utils; + + /** * {@inheritdoc} */ @@ -59,6 +67,13 @@ class IIIFManifest extends StylePluginBase { */ protected $serializer; + /** + * The IIIF Info service. + * + * @var \Drupal\islandora_iiif\IiifInfo + */ + protected $iiifInfo; + /** * The request service. * @@ -104,7 +119,7 @@ class IIIFManifest extends StylePluginBase { /** * {@inheritdoc} */ - public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) { + public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils, IiifInfo $iiif_info) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->serializer = $serializer; @@ -115,6 +130,8 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition $this->httpClient = $http_client; $this->messenger = $messenger; $this->moduleHandler = $moduleHandler; + $this->utils = $utils; + $this->iiifInfo = $iiif_info; } /** @@ -132,7 +149,9 @@ public static function create(ContainerInterface $container, array $configuratio $container->get('file_system'), $container->get('http_client'), $container->get('messenger'), - $container->get('module_handler') + $container->get('module_handler'), + $container->get('islandora.utils'), + $container->get('islandora_iiif') ); } @@ -163,6 +182,11 @@ public function render() { $content_path = implode('/', $url_components); $iiif_base_id = $request_host . '/' . $content_path; + /** + * @var \Drupal\taxonomy\TermInterface|null + */ + $structured_text_term = $this->utils->getTermForUri($this->options['structured_text_term_uri']); + // @see https://iiif.io/api/presentation/2.1/#manifest $json += [ '@type' => 'sc:Manifest', @@ -182,7 +206,7 @@ public function render() { // For each row in the View result. foreach ($this->view->result as $row) { // Add the IIIF URL to the image to print out as JSON. - $canvases = $this->getTileSourceFromRow($row, $iiif_address, $iiif_base_id); + $canvases = $this->getTileSourceFromRow($row, $iiif_address, $iiif_base_id, $structured_text_term); foreach ($canvases as $tile_source) { $json['sequences'][0]['canvases'][] = $tile_source; } @@ -192,6 +216,9 @@ public function render() { $content_type = 'json'; +// Add a search endpoint if one is defined +$this->addSearchEndpoint($json, $url_components); + // Give other modules a chance to alter the manifest. $this->moduleHandler->alter('islandora_iiif_manifest', $json, $this); @@ -208,11 +235,13 @@ public function render() { * @param string $iiif_base_id * The URL for the request, minus the last part of the URL, * which is likely "manifest". + * @param \Drupal\taxonomy\TermInterface|null $structured_text_term + * The term that structured text media references, if any. * * @return array * List of IIIF URLs to display in the Openseadragon viewer. */ - protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_base_id) { + protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_base_id, $structured_text_term) { $canvases = []; foreach (array_filter(array_values($this->options['iiif_tile_field'])) as $iiif_tile_field) { $viewsField = $this->view->field[$iiif_tile_field]; @@ -243,7 +272,10 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas $canvas_id = $iiif_base_id . '/canvas/' . $entity->id(); $annotation_id = $iiif_base_id . '/annotation/' . $entity->id(); - [$width, $height] = $this->getCanvasDimensions($iiif_url, $image, $mime_type); + [$width, $height] = $this->getCanvasDimensions($iiif_url, $entity, $image, $mime_type); + if ($width == 0) { + continue; + } $tmp_canvas = [ // @see https://iiif.io/api/presentation/2.1/#canvas @@ -275,7 +307,7 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas ], ]; - if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) { + if ($ocr_url = $this->getOcrUrl($entity, $structured_text_term)) { $tmp_canvas['seeAlso'] = [ '@id' => $ocr_url, 'format' => 'text/vnd.hocr+html', @@ -312,42 +344,70 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas * @return [string] * The width and height of the image. */ - protected function getCanvasDimensions(string $iiif_url, FieldItemInterface $image, string $mime_type) { + protected function getCanvasDimensions(string $iiif_url, Media $media, FieldItemInterface $image, string $mime_type) { + + // If the media has field_height and field_width, return those values. + if ($media->hasField('field_height') + && !$media->get('field_height')->isEmpty() + && $media->get('field_height')->value > 0 + && $media->hasField('field_width') + && !$media->get('field_width')->isEmpty() + && $media->get('field_width')->value > 0) { + return [intval($media->get('field_width')->value), + intval($media->get('field_height')->value), + ]; + } + // Otherwise start looking at the field/file level for the numbers. if (isset($image->width) && is_numeric($image->width) && isset($image->height) && is_numeric($image->height)) { - return [intval($image->width), intval($image->height)]; + return [intval($image->width), + intval($image->height), + ]; } - try { - $info_json = $this->httpClient->get($iiif_url)->getBody(); - $resource = json_decode($info_json, TRUE); - $width = $resource['width']; - $height = $resource['height']; + if ($properties = $image->getProperties() + && isset($properties['width']) && is_numeric($properties['width']) + && isset($properties['height']) && is_numeric($properties['width'])) { + return [intval($properties['width']), + intval($properties['height']), + ]; } - catch (ClientException | ServerException | ConnectException $e) { - // If we couldn't get the info.json from IIIF - // try seeing if we can get it from Drupal. - if (empty($width) || empty($height)) { - // Get the image properties so we know the image width/height. - $properties = $image->getProperties(); - $width = isset($properties['width']) ? $properties['width'] : 0; - $height = isset($properties['height']) ? $properties['height'] : 0; - - // If this is a TIFF AND we don't know the width/height - // see if we can get the image size via PHP's core function. - if ($mime_type === 'image/tiff' && !$width || !$height) { - $uri = $image->entity->getFileUri(); - $path = $this->fileSystem->realpath($uri); - $image_size = getimagesize($path); - if ($image_size) { - $width = $image_size[0]; - $height = $image_size[1]; - } + + $entity = $image->entity; + if ($entity->hasField('field_height') && !$entity->get('field_height')->isEmpty() + && $entity->get('field_height')->value > 0 + && $entity->hasField('field_width') + && !$entity->get('field_width')->isEmpty() + && $entity->get('field_width')->value > 0) { + return [$entity->get('field_width')->value, + $entity->get('field_height')->value, + ]; + } + + if ($mime_type === 'image/tiff') { + // If this is a TIFF AND we don't know the width/height + // see if we can get the image size via PHP's core function. + $uri = $image->entity->getFileUri(); + $path = $this->fileSystem->realpath($uri); + if (!empty($path)) { + $image_size = getimagesize($path); + if ($image_size) { + return [intval($image_size[0]), + intval($image_size[1]), + ]; } } } - return [$width, $height]; + + // As a last resort, get it from the IIIF server. + // This can be very slow and will fail if there are too many pages. + $dimensions = $this->iiifInfo->getImageDimensions($image->entity); + if ($dimensions !== FALSE) { + return $dimensions; + } + + return [0, 0]; } /** @@ -355,28 +415,36 @@ protected function getCanvasDimensions(string $iiif_url, FieldItemInterface $ima * * @param \Drupal\Core\Entity\EntityInterface $entity * The entity at the current row. - * @param \Drupal\views\ResultRow $row - * Result row. - * @param int $delta - * The delta in case there are multiple canvases on one media. + * @param \Drupal\taxonomy\TermInterface|null $structured_text_term + * The term that structured text media references, if any. * - * @return string|false + * return String|FALSE * The absolute URL of the current row's structured text, * or FALSE if none. */ - protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) { + protected function getOcrUrl(EntityInterface $entity, $structured_text_term) { $ocr_url = FALSE; $iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : []; $ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL; if ($ocrField) { - $ocr_entity = $ocrField->getEntity($row); + $ocr_entity = $entity; $ocr_field_name = $ocrField->definition['field_name']; if (!is_null($ocr_field_name)) { $ocrs = $ocr_entity->{$ocr_field_name}; - $ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE; - if ($ocr) { - $ocr_url = $ocr->entity->createFileUrl(FALSE); - } + $ocr = $ocrs[0] ?? FALSE; + $ocr_url = $ocr->entity->createFileUrl(FALSE); + } + } + elseif ($structured_text_term) { + $parent_node = $this->utils->getParentNode($entity); + $ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $structured_text_term); + $ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL; + $ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL; + if ($ocr_entity) { + $ocr_file_source = $ocr_entity->getSource(); + $ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity); + $ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid); + $ocr_url = $ocr_file->createFileUrl(FALSE); } } @@ -411,6 +479,23 @@ public function getEntityTitle(string $content_path): string { return $entity_title; } + protected function addSearchEndpoint(array &$json, array $url_components) { + $url_base = $this->getRequest()->getSchemeAndHttpHost(); + $hocr_search_path = $this->options['search_endpoint']; + $hocr_search_url = $url_base . '/' . ltrim($hocr_search_path, '/'); + + $hocr_search_url = str_replace('%node', $url_components[1], $hocr_search_url); + + $json['service'][] = [ + "@context" => "http://iiif.io/api/search/0/context.json", + "@id" => $hocr_search_url, + "profile" => "http://iiif.io/api/search/0/search", + "label" => t("Search inside this work"), + ]; + + + } + /** * {@inheritdoc} */ @@ -479,10 +564,26 @@ public function buildOptionsForm(&$form, FormStateInterface $form_state) { '#title' => $this->t('Structured OCR data file field'), '#type' => 'checkboxes', '#default_value' => $this->options['iiif_ocr_file_field'], - '#description' => $this->t('The source of structured OCR text for each entity.'), + '#description' => $this->t('The source of structured OCR text for each entity. If the term setting below is left blank, it will be the same entity as the source image'), '#options' => $field_options, '#required' => FALSE, ]; + $form['structured_text_term'] = [ + '#type' => 'entity_autocomplete', + '#target_type' => 'taxonomy_term', + '#title' => $this->t('Structured OCR text term'), + '#default_value' => $this->utils->getTermForUri($this->options['structured_text_term_uri']), + '#required' => FALSE, + '#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'), + ]; + + $form['search_endpoint'] = [ + '#type' => 'textfield', + '#title' => $this->t("Search endpoint path."), + '#description' => $this->t("If there is a search endpoint to search within the book that returns IIIF annotations, put it here. Use substitutions %node and %keywords.
E.g., paged-content-search/%node?search-in-pages=%keywords"), + '#default_value' => $this->options['search_endpoint'], + '#required' => FALSE, + ]; } /** @@ -495,4 +596,25 @@ public function getFormats() { return ['json' => 'json']; } + /** + * Submit handler for options form. + * + * Used to store the structured text media term by URL instead of Ttid. + * + * @param array $form + * The form. + * @param \Drupal\Core\Form\FormStateInterface $form_state + * The form state object. + */ + // @codingStandardsIgnoreStart + public function submitOptionsForm(&$form, FormStateInterface $form_state) { + // @codingStandardsIgnoreEnd + $style_options = $form_state->getValue('style_options'); + $tid = $style_options['structured_text_term']; + $term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid); + $style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term); + $form_state->setValue('style_options', $style_options); + parent::submitOptionsForm($form, $form_state); + } + }