Skip to content

Commit

Permalink
Add hOCR functionality (#1006)
Browse files Browse the repository at this point in the history
* Add hOCR functionality
* Fix wording in Islandora IIIF View Style config form.
* Islandora IIIF: Attempt to fix broken config schema.
* Islandora IIIF: Remove labels from config schema.
* Islandora IIIF: Add labels to config schema items.
* Update IIIFManifest.php
* Move to memorized term lookup, instead of polluting method call.
* Deal explicitly with URIs, instead of attempting to put the entity in config.
* hocr Islandora IIIF: Add missing null check.

---------

Co-authored-by: Alexander O'Neill <[email protected]>
Co-authored-by: Alexander O'Neill <[email protected]>
Co-authored-by: Rosie Le Faive <[email protected]>
Co-authored-by: Adam Vessey <[email protected]>
  • Loading branch information
5 people authored Apr 19, 2024
1 parent 089a365 commit 9b26616
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 11 deletions.
12 changes: 12 additions & 0 deletions modules/islandora_iiif/config/schema/islandora_iiif.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,17 @@ views.style.iiif_manifest:
mapping:
iiif_tile_field:
type: sequence
label: "Tile source field(s)"
sequence:
type: string
iiif_ocr_file_field:
type: sequence
label: "Structured OCR data file field"
sequence:
type: string
structured_text_term_uri:
type: string
label: "Structured text term"
search_endpoint:
type: string
label: "Search endpoint path"
135 changes: 124 additions & 11 deletions modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Messenger\MessengerInterface;
use Drupal\Core\Url;
use Drupal\islandora\IslandoraUtils;
use Drupal\taxonomy\TermInterface;
use Drupal\views\Plugin\views\style\StylePluginBase;
use Drupal\views\ResultRow;
use GuzzleHttp\Client;
Expand All @@ -35,6 +37,13 @@
*/
class IIIFManifest extends StylePluginBase {

/**
* Islandora utility functions.
*
* @var \Drupal\islandora\IslandoraUtils
*/
protected $utils;

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -108,10 +117,24 @@ class IIIFManifest extends StylePluginBase {
*/
protected $moduleHandler;

/**
* Memoized structured text term.
*
* @var \Drupal\taxonomy\TermInterface|null
*/
protected ?TermInterface $structuredTextTerm;

/**
* Flag to track if we _have_ attempted a lookup, as the value is nullable.
*
* @var bool
*/
protected bool $structuredTextTermMemoized = FALSE;

/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) {
public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils) {
parent::__construct($configuration, $plugin_id, $plugin_definition);

$this->serializer = $serializer;
Expand All @@ -121,6 +144,7 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition
$this->fileSystem = $file_system;
$this->httpClient = $http_client;
$this->messenger = $messenger;
$this->utils = $utils;
$this->moduleHandler = $moduleHandler;
}

Expand All @@ -139,7 +163,8 @@ public static function create(ContainerInterface $container, array $configuratio
$container->get('file_system'),
$container->get('http_client'),
$container->get('messenger'),
$container->get('module_handler')
$container->get('module_handler'),
$container->get('islandora.utils')
);
}

Expand Down Expand Up @@ -217,6 +242,9 @@ public function render() {

$content_type = 'json';

// Add a search endpoint if one is defined.
$this->addSearchEndpoint($json, $url_components);

// Give other modules a chance to alter the manifest.
$this->moduleHandler->alter('islandora_iiif_manifest', $json, $this);

Expand Down Expand Up @@ -300,7 +328,7 @@ protected function getTileSourceFromRow(ResultRow $row, $iiif_address, $iiif_bas
],
];

if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) {
if ($ocr_url = $this->getOcrUrl($entity)) {
$tmp_canvas['seeAlso'] = [
'@id' => $ocr_url,
'format' => 'text/vnd.hocr+html',
Expand Down Expand Up @@ -380,30 +408,38 @@ protected function getCanvasDimensions(string $iiif_url, FieldItemInterface $ima
*
* @param \Drupal\Core\Entity\EntityInterface $entity
* The entity at the current row.
* @param \Drupal\views\ResultRow $row
* Result row.
* @param int $delta
* The delta in case there are multiple canvases on one media.
*
* @return string|false
* The absolute URL of the current row's structured text,
* or FALSE if none.
*/
protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) {
protected function getOcrUrl(EntityInterface $entity) {
$ocr_url = FALSE;
$iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : [];
$ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL;
if ($ocrField) {
$ocr_entity = $ocrField->getEntity($row);
$ocr_entity = $entity;
$ocr_field_name = $ocrField->definition['field_name'];
if (!is_null($ocr_field_name)) {
$ocrs = $ocr_entity->{$ocr_field_name};
$ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE;
$ocr = $ocrs[0] ?? FALSE;
if ($ocr) {
$ocr_url = $ocr->entity->createFileUrl(FALSE);
}
}
}
elseif ($structured_text_term = $this->getStructuredTextTerm()) {
$parent_node = $this->utils->getParentNode($entity);
$ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $structured_text_term);
$ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL;
$ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL;
if ($ocr_entity) {
$ocr_file_source = $ocr_entity->getSource();
$ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity);
$ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid);
$ocr_url = $ocr_file->createFileUrl(FALSE);
}
}

return $ocr_url;
}
Expand Down Expand Up @@ -448,6 +484,29 @@ protected function defineOptions() {
return $options;
}

/**
* Add the configured search endpoint to the manifest.
*
* @param array $json
* The IIIF manifest.
* @param array $url_components
* The search endpoint URL as array.
*/
protected function addSearchEndpoint(array &$json, array $url_components) {
$url_base = $this->getRequest()->getSchemeAndHttpHost();
$hocr_search_path = $this->options['search_endpoint'];
$hocr_search_url = $url_base . '/' . ltrim($hocr_search_path, '/');

$hocr_search_url = str_replace('%node', $url_components[1], $hocr_search_url);

$json['service'][] = [
"@context" => "http://iiif.io/api/search/0/context.json",
"@id" => $hocr_search_url,
"profile" => "http://iiif.io/api/search/0/search",
"label" => t("Search inside this work"),
];
}

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -504,10 +563,27 @@ public function buildOptionsForm(&$form, FormStateInterface $form_state) {
'#title' => $this->t('Structured OCR data file field'),
'#type' => 'checkboxes',
'#default_value' => $this->options['iiif_ocr_file_field'],
'#description' => $this->t('The source of structured OCR text for each entity.'),
'#description' => $this->t("If the hOCR is a field on the same entity as the image source field above, select it here. If it's found in a related entity via the term below, leave this blank."),
'#options' => $field_options,
'#required' => FALSE,
];

$form['structured_text_term'] = [
'#type' => 'entity_autocomplete',
'#target_type' => 'taxonomy_term',
'#title' => $this->t('Structured OCR text term'),
'#default_value' => $this->getStructuredTextTerm(),
'#required' => FALSE,
'#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'),
];

$form['search_endpoint'] = [
'#type' => 'textfield',
'#title' => $this->t("Search endpoint path."),
'#description' => $this->t("If there is a search endpoint to search within the book that returns IIIF annotations, put it here. Use %node substitution where needed.<br>E.g., paged-content-search/%node"),
'#default_value' => $this->options['search_endpoint'],
'#required' => FALSE,
];
}

/**
Expand All @@ -520,4 +596,41 @@ public function getFormats() {
return ['json' => 'json'];
}

/**
* Submit handler for options form.
*
* Used to store the structured text media term by URL instead of Ttid.
*
* @param array $form
* The form.
* @param \Drupal\Core\Form\FormStateInterface $form_state
* The form state object.
*/
// @codingStandardsIgnoreStart
public function submitOptionsForm(&$form, FormStateInterface $form_state) {
// @codingStandardsIgnoreEnd
$style_options = $form_state->getValue('style_options');
$tid = $style_options['structured_text_term'];
unset($style_options['structured_text_term']);
$term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid);
$style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term);
$form_state->setValue('style_options', $style_options);
parent::submitOptionsForm($form, $form_state);
}

/**
* Get the structured text term.
*
* @return \Drupal\taxonomy\TermInterface|null
* The term if it could be found; otherwise, NULL.
*/
protected function getStructuredTextTerm() : ?TermInterface {
if (!$this->structuredTextTermMemoized) {
$this->structuredTextTermMemoized = TRUE;
$this->structuredTextTerm = $this->utils->getTermForUri($this->options['structured_text_term_uri']);
}

return $this->structuredTextTerm;
}

}

0 comments on commit 9b26616

Please sign in to comment.