From cff0d69d3202f9ee301eda35362a3cbdcb4f246f Mon Sep 17 00:00:00 2001 From: Shana Moore Date: Fri, 10 Nov 2023 08:33:36 -0800 Subject: [PATCH 01/14] add parent_model and child_model to iiif_print_pending_relationships --- app/models/iiif_print/pending_relationship.rb | 3 ++- ...add_model_details_to_iiif_print_pending_relationships.rb | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb diff --git a/app/models/iiif_print/pending_relationship.rb b/app/models/iiif_print/pending_relationship.rb index 8c7fc990..f29b7e0c 100644 --- a/app/models/iiif_print/pending_relationship.rb +++ b/app/models/iiif_print/pending_relationship.rb @@ -3,5 +3,6 @@ class PendingRelationship < ApplicationRecord validates :parent_id, presence: true validates :child_title, presence: true validates :child_order, presence: true - end + validates :parent_model, presence: true + validates :child_model, presence: true end diff --git a/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb b/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb new file mode 100644 index 00000000..2c90bf10 --- /dev/null +++ b/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb @@ -0,0 +1,6 @@ +class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2] + def change + add_column :iiif_print_pending_relationships, :parent_model, :string + add_column :iiif_print_pending_relationships, :child_model, :string + end +end From 1c9daacba799bea3b125acf39c44bcfda9038b4d Mon Sep 17 00:00:00 2001 From: Shana Moore Date: Fri, 10 Nov 2023 11:26:28 -0800 Subject: [PATCH 02/14] :gift: pass parent and child model to PendingRelationship initialization --- app/models/iiif_print/pending_relationship.rb | 1 + lib/iiif_print/jobs/child_works_from_pdf_job.rb | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/app/models/iiif_print/pending_relationship.rb b/app/models/iiif_print/pending_relationship.rb index f29b7e0c..dee6b60f 100644 --- a/app/models/iiif_print/pending_relationship.rb +++ b/app/models/iiif_print/pending_relationship.rb @@ -5,4 +5,5 @@ class PendingRelationship < ApplicationRecord validates :child_order, presence: true validates :parent_model, presence: true validates :child_model, presence: true + end end diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index 57574329..c82b52d1 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -96,7 +96,9 @@ def prepare_import_data(original_pdf_path, image_files, user) # save child work info to create the member relationships PendingRelationship.create!(child_title: child_title, parent_id: @parent_work.id, - child_order: child_title) + child_order: child_title, + parent_model: @parent_work.class, + child_model: @parent_work.iiif_print_config.pdf_split_child_model) begin # Clean up the temporary image path. From eed67953892c99f6b26d42a5314d5e0628215c20 Mon Sep 17 00:00:00 2001 From: Shana Moore Date: Fri, 10 Nov 2023 15:17:57 -0800 Subject: [PATCH 03/14] :gift: add file_id to pending_relationships table --- ...52_add_model_details_to_iiif_print_pending_relationships.rb | 1 + lib/iiif_print/jobs/child_works_from_pdf_job.rb | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb b/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb index 2c90bf10..123ef824 100644 --- a/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +++ b/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb @@ -2,5 +2,6 @@ class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5 def change add_column :iiif_print_pending_relationships, :parent_model, :string add_column :iiif_print_pending_relationships, :child_model, :string + add_column :iiif_print_pending_relationships, :file_id, :string end end diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index c82b52d1..f8acb11b 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -98,7 +98,8 @@ def prepare_import_data(original_pdf_path, image_files, user) parent_id: @parent_work.id, child_order: child_title, parent_model: @parent_work.class, - child_model: @parent_work.iiif_print_config.pdf_split_child_model) + child_model: @parent_work.iiif_print_config.pdf_split_child_model, + file_id: file_id) begin # Clean up the temporary image path. From 904477b6052cc05f10a1dc6bc994f84cc6e41e63 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 12:12:03 -0500 Subject: [PATCH 04/14] Require file_id Also some lint fixes. --- app/models/iiif_print/pending_relationship.rb | 1 + lib/iiif_print/jobs/child_works_from_pdf_job.rb | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/models/iiif_print/pending_relationship.rb b/app/models/iiif_print/pending_relationship.rb index dee6b60f..496e617e 100644 --- a/app/models/iiif_print/pending_relationship.rb +++ b/app/models/iiif_print/pending_relationship.rb @@ -5,5 +5,6 @@ class PendingRelationship < ApplicationRecord validates :child_order, presence: true validates :parent_model, presence: true validates :child_model, presence: true + validates :file_id, presence: true end end diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index f8acb11b..78bde08a 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -96,9 +96,9 @@ def prepare_import_data(original_pdf_path, image_files, user) # save child work info to create the member relationships PendingRelationship.create!(child_title: child_title, parent_id: @parent_work.id, - child_order: child_title, + child_order: child_title, parent_model: @parent_work.class, - child_model: @parent_work.iiif_print_config.pdf_split_child_model, + child_model: @parent_work.iiif_print_config.pdf_split_child_model, file_id: file_id) begin From 458053c424b71326f9f458a4e2adc99a5561826c Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 13:57:14 -0500 Subject: [PATCH 05/14] Add PendingRelationships attributes to spec --- spec/iiif_print/jobs/create_relationships_job_spec.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/iiif_print/jobs/create_relationships_job_spec.rb b/spec/iiif_print/jobs/create_relationships_job_spec.rb index 707fd39b..9572af9a 100644 --- a/spec/iiif_print/jobs/create_relationships_job_spec.rb +++ b/spec/iiif_print/jobs/create_relationships_job_spec.rb @@ -9,11 +9,12 @@ module IiifPrint::Jobs let(:parent_model) { WorkWithIiifPrintConfig.to_s } let(:child_model) { WorkWithIiifPrintConfig.to_s } + let(:file) { FileSet.new.tap { |fs| fs.save!(validate: false) } } let(:parent_record) { WorkWithIiifPrintConfig.new(title: ['required title']) } let(:child_record1) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 01"]) } let(:child_record2) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 02"]) } - let(:pending_rel1) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01") } - let(:pending_rel2) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02") } + let(:pending_rel1) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01", parent_model: parent_model, child_model: child_model, file_id: file.id) } + let(:pending_rel2) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02", parent_model: parent_model, child_model: child_model, file_id: file.id) } describe '#perform' do before do From 890c76b4c364fbe789b7537acc4e0a038df047b0 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 14:58:00 -0500 Subject: [PATCH 06/14] Add split_from_pdf_id term --- app/indexers/concerns/iiif_print/child_indexer.rb | 1 + app/models/concerns/iiif_print/set_child_flag.rb | 8 ++++++++ lib/iiif_print/jobs/child_works_from_pdf_job.rb | 5 +++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/app/indexers/concerns/iiif_print/child_indexer.rb b/app/indexers/concerns/iiif_print/child_indexer.rb index 4174cc4a..47b40327 100644 --- a/app/indexers/concerns/iiif_print/child_indexer.rb +++ b/app/indexers/concerns/iiif_print/child_indexer.rb @@ -26,6 +26,7 @@ def self.decorate_work_types! def generate_solr_document super.tap do |solr_doc| solr_doc['is_child_bsi'] ||= object.is_child + solr_doc['split_from_pdf_id_tsi'] ||= object.split_from_pdf_id solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object) # Due to a long-standing hack in Hyrax, the file_set_ids_ssim contains both file_set_ids and diff --git a/app/models/concerns/iiif_print/set_child_flag.rb b/app/models/concerns/iiif_print/set_child_flag.rb index 9294d2e5..69e43d43 100644 --- a/app/models/concerns/iiif_print/set_child_flag.rb +++ b/app/models/concerns/iiif_print/set_child_flag.rb @@ -4,6 +4,9 @@ module RDF class CustomIsChildTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') property 'is_child' end + class SplitFromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') + property 'split_from_pdf_id' + end end module IiifPrint @@ -18,6 +21,11 @@ module SetChildFlag multiple: false do |index| index.as :stored_searchable end + property :split_from_pdf_id, + predicate: ::RDF::SplitFromPdfIdTerm.split_from_pdf_id, + multiple: false do |index| + index.as :stored_searchable + end end def set_children diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index 78bde08a..f4911aa8 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -53,6 +53,7 @@ def split_pdf(original_pdf_path, user, child_model, pdf_file_set) image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path, file_set: pdf_file_set) return if image_files.blank? + @split_from_pdf_id = pdf_file_set.nil? ? nil : pdf_file_set.id prepare_import_data(original_pdf_path, image_files, user) # submit the job to create all the child works for one PDF @@ -70,7 +71,7 @@ def split_pdf(original_pdf_path, user, child_model, pdf_file_set) @child_work_titles, {}, @uploaded_files, - attributes.merge!(model: child_model.to_s).with_indifferent_access, + attributes.merge!(model: child_model.to_s, split_from_pdf_id: @split_from_pdf_id).with_indifferent_access, operation) end # rubocop:enable Metrics/ParameterLists @@ -99,7 +100,7 @@ def prepare_import_data(original_pdf_path, image_files, user) child_order: child_title, parent_model: @parent_work.class, child_model: @parent_work.iiif_print_config.pdf_split_child_model, - file_id: file_id) + file_id: @split_from_pdf_id) begin # Clean up the temporary image path. From be326d5a4945f856379a19ae8e9b8e340d3051da Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 14:59:02 -0500 Subject: [PATCH 07/14] Remove PendingRelationships entries After creating relationships remove the PendingRelationships entries, as they are no longer needed, even if we have more relationships than expected. Cleanup may be needed so still log the error to report the excess relationships. --- lib/iiif_print/jobs/create_relationships_job.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/iiif_print/jobs/create_relationships_job.rb b/lib/iiif_print/jobs/create_relationships_job.rb index edd35b97..a1f876d2 100644 --- a/lib/iiif_print/jobs/create_relationships_job.rb +++ b/lib/iiif_print/jobs/create_relationships_job.rb @@ -29,7 +29,14 @@ def perform(parent_id:, parent_model:, child_model:, retries: 0, **) if @number_of_failures.zero? && @number_of_successes == @pending_children.count # remove pending relationships upon valid completion @pending_children.each(&:destroy) + elsif @number_of_failures.zero? && @number_of_successes > @pending_children.count + # remove pending relationships but raise error that too many relationships formed + @pending_children.each(&:destroy) + raise "CreateRelationshipsJob for parent id: #{@parent_id} " \ + "added #{@number_of_successes} children, " \ + "expected #{@pending_children} children." else + # report failures & keep pending relationships raise "CreateRelationshipsJob failed for parent id: #{@parent_id} " \ "had #{@number_of_successes} successes & #{@number_of_failures} failures, " \ "with errors: #{@errors}. Wanted #{@pending_children} children." From 6c7f1ee0d6f57ab15702e2759545f419bcb165dd Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 17:28:54 -0500 Subject: [PATCH 08/14] Add split_from_pdf_id term to solr document --- app/models/concerns/iiif_print/solr/document.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/concerns/iiif_print/solr/document.rb b/app/models/concerns/iiif_print/solr/document.rb index 489d9366..7d27223d 100644 --- a/app/models/concerns/iiif_print/solr/document.rb +++ b/app/models/concerns/iiif_print/solr/document.rb @@ -19,6 +19,7 @@ module IiifPrint::Solr::Document def self.decorate(base) base.prepend(self) base.send(:attribute, :is_child, Hyrax::SolrDocument::Metadata::Solr::String, 'is_child_bsi') + base.send(:attribute, :split_from_pdf_id, Hyrax::SolrDocument::Metadata::Solr::String, 'split_from_pdf_id_tsi') base.send(:attribute, :digest, Hyrax::SolrDocument::Metadata::Solr::String, 'digest_ssim') # @note These properties came from the newspaper_works gem. They are configurable. From 3ded9fbf3de8bb54a3c857cb0827278f6e279e88 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 20:23:57 -0500 Subject: [PATCH 09/14] Shorten class to appease Rubocop --- app/models/concerns/iiif_print/set_child_flag.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/iiif_print/set_child_flag.rb b/app/models/concerns/iiif_print/set_child_flag.rb index 69e43d43..6e40c533 100644 --- a/app/models/concerns/iiif_print/set_child_flag.rb +++ b/app/models/concerns/iiif_print/set_child_flag.rb @@ -4,7 +4,7 @@ module RDF class CustomIsChildTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') property 'is_child' end - class SplitFromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') + class FromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') property 'split_from_pdf_id' end end @@ -22,7 +22,7 @@ module SetChildFlag index.as :stored_searchable end property :split_from_pdf_id, - predicate: ::RDF::SplitFromPdfIdTerm.split_from_pdf_id, + predicate: ::RDF::FromPdfIdTerm.split_from_pdf_id, multiple: false do |index| index.as :stored_searchable end From a686bc5d20fc4a3a695dfb49134b2e0330823e37 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Mon, 13 Nov 2023 22:34:39 -0500 Subject: [PATCH 10/14] Create service to destroy child works of PDFs When a fileset is destroyed containing a PDF which was used to split into child works, this service performs the necessary cleanup. --- .../actors/file_set_actor_decorator.rb | 13 ++++++- lib/iiif_print.rb | 1 + .../destroy_pdf_child_works_service.rb | 34 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb diff --git a/app/actors/iiif_print/actors/file_set_actor_decorator.rb b/app/actors/iiif_print/actors/file_set_actor_decorator.rb index 81b61271..81d44f52 100644 --- a/app/actors/iiif_print/actors/file_set_actor_decorator.rb +++ b/app/actors/iiif_print/actors/file_set_actor_decorator.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# override to add PDF splitting for file sets +# override to add PDF splitting for file sets and remove splitting upon fileset delete module IiifPrint module Actors module FileSetActorDecorator @@ -33,6 +33,17 @@ def attach_to_work(work, file_set_params = {}) def service IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService end + + # Clean up children when removing the fileset + def destroy + # we destroy the children before the file_set, because we need the parent relationship + IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( + file_set: file_set, + work: file_set.parent + ) + # and now back to your regularly scheduled programming + super + end end end end diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb index 006a35a8..e7f2dfaa 100644 --- a/lib/iiif_print.rb +++ b/lib/iiif_print.rb @@ -20,6 +20,7 @@ require "iiif_print/split_pdfs/base_splitter" require "iiif_print/split_pdfs/child_work_creation_from_pdf_service" require "iiif_print/split_pdfs/derivative_rodeo_splitter" +require "iiif_print/split_pdfs/destroy_pdf_child_works_service" module IiifPrint extend ActiveSupport::Autoload diff --git a/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb new file mode 100644 index 00000000..cfae9bfa --- /dev/null +++ b/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module IiifPrint + module SplitPdfs + ## Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works + class DestroyPdfChildWorksService + ## @api public + # @param file_set [FileSet] What is the containing file set for the provided file. + # @param work [Hydra::PCDM::Work] Parent of the fileset being deleted + def self.conditionally_destroy_spawned_children_of(file_set:, work:) + child_model = work.try(:iiif_print_config)&.pdf_split_child_model + return unless child_model + return unless file_set.class.pdf_mime_types.include?(file_set.mime_type) + + IiifPrint::PendingRelationship.where(parent_id: work.id, file_id: file_set.id).each(&:destroy) + destroy_spawned_children(model: child_model, file_set: file_set, work: work) + true + end + + def self.destroy_spawned_children(model:, file_set:, work:) + # look first for children by the file set id they were split from + children = model.where(split_from_pdf_id: file_set.id) + if children.blank? + # find works where file name and work `to_param` are both in the title + children = model.where(title: file_set.label).where(title: work.to_param) + end + return if children.blank? + children.each do |rcd| + rcd.destroy(eradicate: true) + end + end + end + end +end From a23582db52a405dd17f2cf1cec8cdc3644825a93 Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Tue, 14 Nov 2023 10:07:22 -0500 Subject: [PATCH 11/14] Appease rubocop --- .../actors/file_set_actor_decorator.rb | 4 ++-- .../concerns/iiif_print/set_child_flag.rb | 1 + .../jobs/create_relationships_job_spec.rb | 18 ++++++++++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/app/actors/iiif_print/actors/file_set_actor_decorator.rb b/app/actors/iiif_print/actors/file_set_actor_decorator.rb index 81d44f52..fda63b65 100644 --- a/app/actors/iiif_print/actors/file_set_actor_decorator.rb +++ b/app/actors/iiif_print/actors/file_set_actor_decorator.rb @@ -38,9 +38,9 @@ def service def destroy # we destroy the children before the file_set, because we need the parent relationship IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( - file_set: file_set, + file_set: file_set, work: file_set.parent - ) + ) # and now back to your regularly scheduled programming super end diff --git a/app/models/concerns/iiif_print/set_child_flag.rb b/app/models/concerns/iiif_print/set_child_flag.rb index 6e40c533..99966c2e 100644 --- a/app/models/concerns/iiif_print/set_child_flag.rb +++ b/app/models/concerns/iiif_print/set_child_flag.rb @@ -4,6 +4,7 @@ module RDF class CustomIsChildTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') property 'is_child' end + class FromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/') property 'split_from_pdf_id' end diff --git a/spec/iiif_print/jobs/create_relationships_job_spec.rb b/spec/iiif_print/jobs/create_relationships_job_spec.rb index 9572af9a..ee54370f 100644 --- a/spec/iiif_print/jobs/create_relationships_job_spec.rb +++ b/spec/iiif_print/jobs/create_relationships_job_spec.rb @@ -13,8 +13,22 @@ module IiifPrint::Jobs let(:parent_record) { WorkWithIiifPrintConfig.new(title: ['required title']) } let(:child_record1) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 01"]) } let(:child_record2) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 02"]) } - let(:pending_rel1) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01", parent_model: parent_model, child_model: child_model, file_id: file.id) } - let(:pending_rel2) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02", parent_model: parent_model, child_model: child_model, file_id: file.id) } + let(:pending_rel1) { IiifPrint::PendingRelationship.new( + parent_id: parent_record.id, + child_title: "Child of #{parent_record.id} page 01", + child_order: "Child of #{parent_record.id} page 01", + parent_model: parent_model, + child_model: child_model, + file_id: file.id + ) } + let(:pending_rel2) { IiifPrint::PendingRelationship.new( + parent_id: parent_record.id, + child_title: "Child of #{parent_record.id} page 02", + child_order: "Child of #{parent_record.id} page 02", + parent_model: parent_model, + child_model: child_model, + file_id: file.id + ) } describe '#perform' do before do From ddbeab55802c353d8e21db802e541c4d9f3488eb Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Tue, 14 Nov 2023 10:35:36 -0500 Subject: [PATCH 12/14] Delete spawned child works as a work is deleted This plugs into the cleanup file sets process, to add additional cleanup of child works which were created as part of a IiifPrint PDF split. Other child works are not affected. --- .../cleanup_file_sets_actor_decorator.rb | 24 +++++++++++++++++++ lib/iiif_print/engine.rb | 1 + 2 files changed, 25 insertions(+) create mode 100644 app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb diff --git a/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb b/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb new file mode 100644 index 00000000..b000a35a --- /dev/null +++ b/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +# override Hyrax to remove splitting upon work delete +module IiifPrint + module Actors + # Responsible for removing FileSets related to the given curation concern. + module CleanupFileSetsActorDecorator + # @param [Hyrax::Actors::Environment] env + # @return [Boolean] true if destroy was successful + def destroy(env) + file_sets = env.curation_concern.file_sets + file_sets.each do |file_set| + # we destroy the children before the file_set, because we need the parent relationship + IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( + file_set: file_set, + work: env.curation_concern + ) + end + # and now back to your regularly scheduled programming + super + end + end + end +end diff --git a/lib/iiif_print/engine.rb b/lib/iiif_print/engine.rb index 1330f20a..b314e35f 100644 --- a/lib/iiif_print/engine.rb +++ b/lib/iiif_print/engine.rb @@ -52,6 +52,7 @@ class Engine < ::Rails::Engine ::BlacklightIiifSearch::IiifSearchAnnotation.prepend(IiifPrint::BlacklightIiifSearch::AnnotationDecorator) ::BlacklightIiifSearch::IiifSearch.prepend(IiifPrint::IiifSearchDecorator) Hyrax::Actors::FileSetActor.prepend(IiifPrint::Actors::FileSetActorDecorator) + Hyrax::Actors::CleanupFileSetsActor.prepend(IiifPrint::Actors::CleanupFileSetsActorDecorator) Hyrax.config do |config| config.callback.set(:after_create_fileset) do |file_set, user| From 1b85c3f3e6f798b838bd8243465dc95b9d10fcfb Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Tue, 14 Nov 2023 15:59:17 -0500 Subject: [PATCH 13/14] Add spec for destroy_pdf_child_works_service --- .../destroy_pdf_child_works_service.rb | 3 +- .../destroy_pdf_child_works_service_spec.rb | 88 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb diff --git a/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb b/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb index cfae9bfa..8b98b55e 100644 --- a/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +++ b/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb @@ -14,10 +14,9 @@ def self.conditionally_destroy_spawned_children_of(file_set:, work:) IiifPrint::PendingRelationship.where(parent_id: work.id, file_id: file_set.id).each(&:destroy) destroy_spawned_children(model: child_model, file_set: file_set, work: work) - true end - def self.destroy_spawned_children(model:, file_set:, work:) + private_class_method def self.destroy_spawned_children(model:, file_set:, work:) # look first for children by the file set id they were split from children = model.where(split_from_pdf_id: file_set.id) if children.blank? diff --git a/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb b/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb new file mode 100644 index 00000000..22daa395 --- /dev/null +++ b/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe IiifPrint::SplitPdfs::DestroyPdfChildWorksService do + let(:subject) { described_class.conditionally_destroy_spawned_children_of(file_set: fileset, work: work) } + + let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') } + let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } } + let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true ) } + let(:pending_rel1) { IiifPrint::PendingRelationship.new( + parent_id: work.id, + child_title: "Child of #{work.id} file.pdf page 01", + child_order: "Child of #{work.id} file.pdf page 01", + parent_model: WorkWithIiifPrintConfig, + child_model: WorkWithIiifPrintConfig, + file_id: fileset.id + ) } + let(:pending_rel2) { IiifPrint::PendingRelationship.new( + parent_id: work.id, + child_title: "Child of #{work.id} another.pdf page 01", + child_order: "Child of #{work.id} another.pdf page 01", + parent_model: WorkWithIiifPrintConfig, + child_model: WorkWithIiifPrintConfig, + file_id: 'another' + ) } + # let(:uploaded_pdf_file) { create(:uploaded_pdf_file) } + # let(:uploaded_file_ids) { [uploaded_pdf_file.id] } + + before do + allow(fileset).to receive(:parent).and_return(work) + allow(fileset).to receive(:label).and_return('file.pdf') + allow(fileset).to receive(:mime_type).and_return('application/pdf') + end + + describe 'class' do + subject { described_class } + + it { is_expected.to respond_to(:conditionally_destroy_spawned_children_of) } + it { is_expected.not_to respond_to(:destroy_spawned_children) } + end + + describe '#conditionally_destroy_spawned_children_of' do + context 'with child works by fileset id' do + before do + allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([child_work]) + end + + it 'destroys the child works' do + expect(child_work).to receive(:destroy) + subject + end + end + + context 'with child works by title' do + before do + allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([]) + allow(WorkWithIiifPrintConfig).to receive(:where).and_return([child_work]) + end + + it 'destroys the child works' do + expect(child_work).to receive(:destroy) + subject + end + end + + context 'when fileset is not a PDF mimetype' do + before do + allow(fileset).to receive(:mime_type).and_return('not_pdf') + end + + it 'returns with no changes' do + expect(IiifPrint::PendingRelationship).not_to receive(:where) + end + end + + context 'when IiifPrint::PendingRelationship records exist' do + before do + pending_rel1.save + pending_rel2.save + end + + it 'deletes only records associated with the specific fileset PDF file' do + expect{ subject }.to change(IiifPrint::PendingRelationship,:count).by(-1) + end + end + end +end From 65df06b202becf75d435f5274a937aa00d13eb2b Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Tue, 14 Nov 2023 16:07:31 -0500 Subject: [PATCH 14/14] Rubocop fixes --- .../actors/cleanup_file_sets_actor_decorator.rb | 6 +++--- .../jobs/create_relationships_job_spec.rb | 12 ++++++++---- .../destroy_pdf_child_works_service_spec.rb | 16 ++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb b/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb index b000a35a..329ed680 100644 --- a/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +++ b/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb @@ -12,9 +12,9 @@ def destroy(env) file_sets.each do |file_set| # we destroy the children before the file_set, because we need the parent relationship IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of( - file_set: file_set, - work: env.curation_concern - ) + file_set: file_set, + work: env.curation_concern + ) end # and now back to your regularly scheduled programming super diff --git a/spec/iiif_print/jobs/create_relationships_job_spec.rb b/spec/iiif_print/jobs/create_relationships_job_spec.rb index ee54370f..4b0bade0 100644 --- a/spec/iiif_print/jobs/create_relationships_job_spec.rb +++ b/spec/iiif_print/jobs/create_relationships_job_spec.rb @@ -13,22 +13,26 @@ module IiifPrint::Jobs let(:parent_record) { WorkWithIiifPrintConfig.new(title: ['required title']) } let(:child_record1) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 01"]) } let(:child_record2) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 02"]) } - let(:pending_rel1) { IiifPrint::PendingRelationship.new( + let(:pending_rel1) do + IiifPrint::PendingRelationship.new( parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01", parent_model: parent_model, child_model: child_model, file_id: file.id - ) } - let(:pending_rel2) { IiifPrint::PendingRelationship.new( + ) + end + let(:pending_rel2) do + IiifPrint::PendingRelationship.new( parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02", parent_model: parent_model, child_model: child_model, file_id: file.id - ) } + ) + end describe '#perform' do before do diff --git a/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb b/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb index 22daa395..4a9d65ad 100644 --- a/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +++ b/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb @@ -7,23 +7,27 @@ let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') } let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } } - let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true ) } - let(:pending_rel1) { IiifPrint::PendingRelationship.new( + let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true) } + let(:pending_rel1) do + IiifPrint::PendingRelationship.new( parent_id: work.id, child_title: "Child of #{work.id} file.pdf page 01", child_order: "Child of #{work.id} file.pdf page 01", parent_model: WorkWithIiifPrintConfig, child_model: WorkWithIiifPrintConfig, file_id: fileset.id - ) } - let(:pending_rel2) { IiifPrint::PendingRelationship.new( + ) + end + let(:pending_rel2) do + IiifPrint::PendingRelationship.new( parent_id: work.id, child_title: "Child of #{work.id} another.pdf page 01", child_order: "Child of #{work.id} another.pdf page 01", parent_model: WorkWithIiifPrintConfig, child_model: WorkWithIiifPrintConfig, file_id: 'another' - ) } + ) + end # let(:uploaded_pdf_file) { create(:uploaded_pdf_file) } # let(:uploaded_file_ids) { [uploaded_pdf_file.id] } @@ -81,7 +85,7 @@ end it 'deletes only records associated with the specific fileset PDF file' do - expect{ subject }.to change(IiifPrint::PendingRelationship,:count).by(-1) + expect { subject }.to change(IiifPrint::PendingRelationship, :count).by(-1) end end end