From ed5869b43c0e8158462c41df5425a0287ce8c1a6 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 20 Nov 2023 10:35:07 -0500 Subject: [PATCH 1/5] Service to split previously ingested pdf This creates a new service which combines: - IiifPrint::SplitPdfs::DestroyPdfChildWorksService - IiifPring::Jobs::ChildWorksFromPdfJob Beginning with a PDF fileset, it removes any existing child works (found by either the fileset ID or the title), any pending relationship table entries (found by parent id and fileset ID), and then submits a new job to do the pdf splitting. Note that pending relationship entries will not be removed if they don't have the file_id (which is the id of the fileset that spawned them). This is to avoid removing pending relationships that could still be needed for another fileset on the parent work. --- .../split_pdfs/pdf_child_works_service.rb | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 lib/iiif_print/split_pdfs/pdf_child_works_service.rb diff --git a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb new file mode 100644 index 00000000..2cb528ed --- /dev/null +++ b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module IiifPrint + module SplitPdfs + ## Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works + class PdfChildWorksService + def self.create_pdf_child_works_for(file_set:, user:) + locations = pdfs_only_for([Hyrax::WorkingDirectory.find_or_retrieve(file.id, file_set.id)]) + return if locations.empty? + work = file_set.parent + + # clean up any existing spawned child works of this file_set + IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( + file_set: file_set, + work: work + ) + + # submit a job to split pdf into child works + work.iiif_print_config.pdf_splitter_job.perform_later( + file_set, + locations, + user, + work.admin_set_id, + 0 # A no longer used parameter; but we need to preserve the method signature (for now) + ) + end + + # @todo: can we use mimetype instead? + def self.pdfs_only_for(paths) + paths.select { |path| path.end_with?('.pdf', '.PDF') } + end + end + end +end \ No newline at end of file From 16fafe41b0161446983072c038c60101d6a16a0a Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Wed, 22 Nov 2023 12:59:48 -0500 Subject: [PATCH 2/5] Address feedback re: split_for --- lib/iiif_print/split_pdfs/pdf_child_works_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb index 2cb528ed..54574a4e 100644 --- a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb +++ b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb @@ -7,7 +7,7 @@ class PdfChildWorksService def self.create_pdf_child_works_for(file_set:, user:) locations = pdfs_only_for([Hyrax::WorkingDirectory.find_or_retrieve(file.id, file_set.id)]) return if locations.empty? - work = file_set.parent + work = IiifPrint.parent_for(file_set) # clean up any existing spawned child works of this file_set IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( From b9aa515e0bebac5b231f693528a7e80b6d91893b Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 20 Nov 2023 10:35:07 -0500 Subject: [PATCH 3/5] Service to split previously ingested pdf This creates a new service which combines: - IiifPrint::SplitPdfs::DestroyPdfChildWorksService - IiifPring::Jobs::ChildWorksFromPdfJob Beginning with a PDF fileset, it removes any existing child works (found by either the fileset ID or the title), any pending relationship table entries (found by parent id and fileset ID), and then submits a new job to do the pdf splitting. Note that pending relationship entries will not be removed if they don't have the file_id (which is the id of the fileset that spawned them). This is to avoid removing pending relationships that could still be needed for another fileset on the parent work. --- lib/iiif_print/split_pdfs/pdf_child_works_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb index 54574a4e..347cc03e 100644 --- a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb +++ b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb @@ -31,4 +31,4 @@ def self.pdfs_only_for(paths) end end end -end \ No newline at end of file +end From dc732a0a627a69afc74029114e339ac2d3d0c075 Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Mon, 27 Nov 2023 14:33:34 -0500 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=8E=81=20Add=20action=20to=20re-split?= =?UTF-8?q?=20PDF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For applications that have already installed IiifPrint, you'll need to manually add the following to your `config/routes.rb` file: ```ruby mount IiifPrint::Engine, at: '/' ``` Related to: - https://github.com/scientist-softserv/iiif_print/pull/292 - https://github.com/scientist-softserv/iiif_print/issues/294 Co-authored-by: LaRita Robinson --- .../iiif_print/split_pdfs_controller.rb | 38 +++++++++++++++++++ .../hyrax/file_sets/_show_actions.html.erb | 24 ++++++++++++ config/locales/iiif_print.en.yml | 4 ++ config/routes.rb | 3 ++ .../iiif_print/install_generator.rb | 8 ++++ lib/iiif_print.rb | 1 + lib/iiif_print/errors.rb | 9 +++++ lib/iiif_print/jobs/request_split_pdf_job.rb | 38 +++++++++++++++++++ .../split_pdfs/pdf_child_works_service.rb | 34 ----------------- 9 files changed, 125 insertions(+), 34 deletions(-) create mode 100644 app/controllers/iiif_print/split_pdfs_controller.rb create mode 100644 app/views/hyrax/file_sets/_show_actions.html.erb create mode 100644 config/routes.rb create mode 100644 lib/iiif_print/jobs/request_split_pdf_job.rb delete mode 100644 lib/iiif_print/split_pdfs/pdf_child_works_service.rb diff --git a/app/controllers/iiif_print/split_pdfs_controller.rb b/app/controllers/iiif_print/split_pdfs_controller.rb new file mode 100644 index 00000000..531b2b06 --- /dev/null +++ b/app/controllers/iiif_print/split_pdfs_controller.rb @@ -0,0 +1,38 @@ +module IiifPrint + # Responsible for coordinating the request to resplit a PDF. + class SplitPdfsController < ApplicationController + before_action :authenticate_user! + + def create + @file_set = FileSet.where(id: params[:file_set_id]).first + authorize_create_split_request!(@file_set) + IiifPrint::Jobs::RequestSplitPdfJob.perform_later(file_set: @file_set, user: current_user) + respond_to do |wants| + wants.html { redirect_to polymorphic_path([main_app, @file_set]), notice: t("iiif_print.file_set.split_submitted", id: @file_set.id) } + wants.json { render json: { id: @file_set.id, to_param: @file_set.to_param }, status: :ok } + end + end + + private + + ## + # @param file_set [FileSet] + def authorize_create_split_request!(file_set) + # NOTE: Duplicates logic of Hyrax: https://github.com/samvera/hyrax/blob/b334e186e77691d7da8ed59ff27f091be1c2a700/app/controllers/hyrax/file_sets_controller.rb#L234-L241 + # + # Namely if we don't have a file_set we need not proceed. + raise CanCan::AccessDenied unless file_set + + ## + # Rely on CanCan's authorize! method. We could add the :split_pdf action to the ability + # class. But we're pigging backing on the idea that you can do this if you can edit the work. + authorize!(:edit, file_set) + raise "Expected #{file_set.class} ID=#{file_set.id} #to_param=#{file_set.to_param} to be a PDF. Instead found mime_type of #{file_set.mime_type}." unless file_set.pdf? + + work = IiifPrint.parent_for(file_set) + raise WorkNotConfiguredToSplitFileSetError.new(file_set: file_set, work: work) unless work&.iiif_print_config&.pdf_splitter_job&.presence + + true + end + end +end diff --git a/app/views/hyrax/file_sets/_show_actions.html.erb b/app/views/hyrax/file_sets/_show_actions.html.erb new file mode 100644 index 00000000..d662de93 --- /dev/null +++ b/app/views/hyrax/file_sets/_show_actions.html.erb @@ -0,0 +1,24 @@ +
+ <% if Hyrax.config.analytics? %> + <% # turbolinks needs to be turned off or the page will use the cache and the %> + <% # analytics graph will not show unless the page is refreshed. %> + <%= link_to t('.analytics'), @presenter.stats_path, id: 'stats', class: 'btn btn-default', data: { turbolinks: false } %> + <% end %> + + <%# Hyrax 2.9.6 does not respond to workflow_restriction; that is something added in later versions. %> + <% if @presenter.editor? && (!respond_to?(:workflow_restriction?) || !workflow_restriction?(@presenter)) %> + <%= link_to t(".edit_this", type: @presenter.human_readable_type), edit_polymorphic_path([main_app, @presenter]), + class: 'btn btn-default' %> + <%= link_to t(".delete_this", type: @presenter.human_readable_type), [main_app, @presenter], + class: 'btn btn-danger', data: { confirm: t(".confirm_delete_this", type: @presenter.human_readable_type) }, + method: :delete %> + <% end %> + + <% if @presenter.editor? && @presenter.pdf? %> + <%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter), + class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") }, + method: :post %> + <% end %> + + <%= render 'social_media' %> +
diff --git a/config/locales/iiif_print.en.yml b/config/locales/iiif_print.en.yml index 5eab00ea..aa3a8361 100644 --- a/config/locales/iiif_print.en.yml +++ b/config/locales/iiif_print.en.yml @@ -52,6 +52,10 @@ en: label: 'Place of publication' publication_title: label: 'Publication' + file_set: + split_this: 'Re-Split PDF' + confirm_split_this: 'Re-Split PDF' + split_submitted: 'Submitted PDF re-splitting job for FileSet ID=%{id}' newspapers_search: title: 'Search Newspapers' text: 'Use this form to search for full-text newspaper content.' diff --git a/config/routes.rb b/config/routes.rb new file mode 100644 index 00000000..8d6d55e4 --- /dev/null +++ b/config/routes.rb @@ -0,0 +1,3 @@ +IiifPrint::Engine.routes.draw do + post "split_pdfs/:file_set_id" => "split_pdfs#create", as: :split_pdf +end diff --git a/lib/generators/iiif_print/install_generator.rb b/lib/generators/iiif_print/install_generator.rb index 99b6453b..20473075 100644 --- a/lib/generators/iiif_print/install_generator.rb +++ b/lib/generators/iiif_print/install_generator.rb @@ -22,6 +22,14 @@ def catalog_controller_configuration generate 'iiif_print:catalog_controller' end + def install_routes + return if IO.read('config/routes.rb').include?('mount IiifPrint::Engine') + + inject_into_file 'config/routes.rb', after: /mount Hyrax::Engine\s*\n/ do + " mount IiifPrint::Engine, at: '/'\n"\ + end + end + def inject_configuration copy_file 'config/initializers/iiif_print.rb' end diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb index af2fb2e9..595cb5fe 100644 --- a/lib/iiif_print.rb +++ b/lib/iiif_print.rb @@ -17,6 +17,7 @@ require "iiif_print/jobs/application_job" require "iiif_print/blacklight_iiif_search/annotation_decorator" require "iiif_print/jobs/child_works_from_pdf_job" +require "iiif_print/jobs/request_split_pdf_job" require "iiif_print/split_pdfs/base_splitter" require "iiif_print/split_pdfs/child_work_creation_from_pdf_service" require "iiif_print/split_pdfs/derivative_rodeo_splitter" diff --git a/lib/iiif_print/errors.rb b/lib/iiif_print/errors.rb index 28727031..35cc1467 100644 --- a/lib/iiif_print/errors.rb +++ b/lib/iiif_print/errors.rb @@ -9,4 +9,13 @@ class DataError < IiifPrintError class MissingFileError < IiifPrintError end + + class WorkNotConfiguredToSplitFileSetError < IiifPrintError + def initialize(file_set:, work:) + message = "Expected that we would be splitting #{file_set.class} ID=#{file_set&.id} #to_param=#{file_set&.to_param} " \ + "for work #{work.class} ID=#{work&.id} #to_param=#{work&.to_param}. " \ + "However it was not configured for PDF splitting." + super(message) + end + end end diff --git a/lib/iiif_print/jobs/request_split_pdf_job.rb b/lib/iiif_print/jobs/request_split_pdf_job.rb new file mode 100644 index 00000000..f1076c5f --- /dev/null +++ b/lib/iiif_print/jobs/request_split_pdf_job.rb @@ -0,0 +1,38 @@ +module IiifPrint + module Jobs + ## + # Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works + class RequestSplitPdfJob < IiifPrint::Jobs::ApplicationJob + ## + # @param file_set [FileSet] + # @param user [User] + # rubocop:disable Metrics/MethodLength + def perform(file_set:, user:) + return true if file_set.pdf? + + work = IiifPrint.parent_for(file_set) + + # Woe is ye who changes the configuration of the model, thus removing the splitting. + raise WorkNotConfiguredToSplitFileSetError.new(work: work, file_set: file_set) unless work&.iiif_print_config&.pdf_splitter_job&.presence + + # clean up any existing spawned child works of this file_set + IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( + file_set: file_set, + work: work + ) + + location = Hyrax::WorkingDirectory.find_or_retrieve(file_set.files.first.id, file_set.id) + + # submit a job to split pdf into child works + work.iiif_print_config.pdf_splitter_job.perform_later( + file_set, + [location], + user, + work.admin_set_id, + 0 # A no longer used parameter; but we need to preserve the method signature (for now) + ) + end + # rubocop:enable Metrics/MethodLength + end + end +end diff --git a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/pdf_child_works_service.rb deleted file mode 100644 index 347cc03e..00000000 --- a/lib/iiif_print/split_pdfs/pdf_child_works_service.rb +++ /dev/null @@ -1,34 +0,0 @@ -# frozen_string_literal: true - -module IiifPrint - module SplitPdfs - ## Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works - class PdfChildWorksService - def self.create_pdf_child_works_for(file_set:, user:) - locations = pdfs_only_for([Hyrax::WorkingDirectory.find_or_retrieve(file.id, file_set.id)]) - return if locations.empty? - work = IiifPrint.parent_for(file_set) - - # clean up any existing spawned child works of this file_set - IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( - file_set: file_set, - work: work - ) - - # submit a job to split pdf into child works - work.iiif_print_config.pdf_splitter_job.perform_later( - file_set, - locations, - user, - work.admin_set_id, - 0 # A no longer used parameter; but we need to preserve the method signature (for now) - ) - end - - # @todo: can we use mimetype instead? - def self.pdfs_only_for(paths) - paths.select { |path| path.end_with?('.pdf', '.PDF') } - end - end - end -end From cd5102eac5965221f61ceaf5af252fd8fe907c0d Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Mon, 27 Nov 2023 17:29:34 -0500 Subject: [PATCH 5/5] =?UTF-8?q?=F0=9F=90=9B=20Fix=20logic=20bug=20regardin?= =?UTF-8?q?g=20splitting=20job?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kirk Wang --- lib/iiif_print/jobs/request_split_pdf_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iiif_print/jobs/request_split_pdf_job.rb b/lib/iiif_print/jobs/request_split_pdf_job.rb index f1076c5f..e11ad4a0 100644 --- a/lib/iiif_print/jobs/request_split_pdf_job.rb +++ b/lib/iiif_print/jobs/request_split_pdf_job.rb @@ -8,7 +8,7 @@ class RequestSplitPdfJob < IiifPrint::Jobs::ApplicationJob # @param user [User] # rubocop:disable Metrics/MethodLength def perform(file_set:, user:) - return true if file_set.pdf? + return true unless file_set.pdf? work = IiifPrint.parent_for(file_set)