Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Work through fallback of rodeo URLs #305

Merged
merged 2 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions app/services/iiif_print/derivative_rodeo_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def named_derivatives_and_generators
# @param adapter_name [String] Added as a parameter to make testing just a bit easier. See
# {.preprocessed_location_adapter_name}
#
# @return [String]
# @return [String] when we have a possible candidate.
# @return [NilClass] when we could not derive a candidate.
# rubocop:disable Metrics/MethodLength
def self.derivative_rodeo_uri(file_set:, filename: nil, extension: nil, adapter_name: preprocessed_location_adapter_name)
# TODO: This is a hack that knows about the inner workings of Hydra::Works, but for
Expand All @@ -91,6 +92,7 @@ def self.derivative_rodeo_uri(file_set:, filename: nil, extension: nil, adapter_
filename ||= Hydra::Works::DetermineOriginalName.call(file_set.original_file)

dirname = derivative_rodeo_preprocessed_directory_for(file_set: file_set, filename: filename)
return nil unless dirname

# The aforementioned filename and the following basename and extension are here to allow for
# us to take an original file and see if we've pre-processed the derivative file. In the
Expand Down Expand Up @@ -141,6 +143,8 @@ def self.get_ancestor(filename: nil, file_set:)
# @param file_set [FileSet]
# @param filename [String]
# @return [String] the dirname (without any "/" we hope)
# @return [NilClass] when we cannot infer a URI from the object.
# rubocop:disable Metrics/MethodLength
def self.derivative_rodeo_preprocessed_directory_for(file_set:, filename:)
ancestor, ancestor_type = get_ancestor(filename: filename, file_set: file_set)

Expand All @@ -152,19 +156,23 @@ def self.derivative_rodeo_preprocessed_directory_for(file_set:, filename:)
message = "#{self.class}.#{__method__} #{file_set.class} ID=#{file_set.id} and filename: #{filename.inspect}" \
"has #{ancestor_type} of #{ancestor.class} ID=#{ancestor.id}"
Rails.logger.info(message)
ancestor.public_send(parent_work_identifier_property_name) ||
raise("Expected #{ancestor.class} ID=#{ancestor.id} (#{ancestor_type} of #{file_set.class} ID=#{file_set.id}) " \
"to have a present #{parent_work_identifier_property_name.inspect}")
parent_work_identifier = ancestor.public_send(parent_work_identifier_property_name)
return parent_work_identifier if parent_work_identifier.present?
Rails.logger.warn("Expected #{ancestor.class} ID=#{ancestor.id} (#{ancestor_type} of #{file_set.class} ID=#{file_set.id}) " \
"to have a present #{parent_work_identifier_property_name.inspect}")
nil
else
# HACK: This makes critical assumptions about how we're creating the title for the file_set;
# but we don't have much to fall-back on. Consider making this a configurable function. Or
# perhaps this entire method should be more configurable.
# TODO: Revisit this implementation.
file_set.title.first.split(".").first ||
raise("#{file_set.class} ID=#{file_set.id} has title #{file_set.title.first} from which we cannot infer information.")
candidate = file_set.title.first.split(".").first
return candidate if candidate.present?
nil
end
# rubocop:enable Style/GuardClause
end
# rubocop:enable Metrics/MethodLength

def initialize(file_set)
@file_set = file_set
Expand Down
2 changes: 1 addition & 1 deletion lib/iiif_print/jobs/child_works_from_pdf_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def split_pdf(original_pdf_path, user, child_model, pdf_file_set)
"pdf_file_set #{pdf_file_set.inspect}"
end

@split_from_pdf_id = pdf_file_set.nil? ? nil : pdf_file_set.id
@split_from_pdf_id = pdf_file_set&.id
prepare_import_data(original_pdf_path, image_files, user)

# submit the job to create all the child works for one PDF
Expand Down
15 changes: 12 additions & 3 deletions lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,20 @@ def initialize(filename, file_set:, output_tmp_dir: Dir.tmpdir)
#
# @see https://github.com/scientist-softserv/space_stone-serverless/blob/7f46dd5b218381739cd1c771183f95408a4e0752/awslambda/handler.rb#L58-L63
# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/AbcSize
def preprocessed_location_template
return @preprocessed_location_template if defined?(@preprocessed_location_template)

derivative_rodeo_candidate = IiifPrint::DerivativeRodeoService.derivative_rodeo_uri(file_set: file_set, filename: filename)

@preprocessed_location_template =
if rodeo_conformant_uri_exists?(derivative_rodeo_candidate)
if derivative_rodeo_candidate.blank?
message = "#{self.class}##{__method__} could not establish derivative_rodeo_candidate for " \
"#{file_set.class} ID=#{file_set&.id} #to_param=#{file_set&.to_param} with filename #{filename.inspect}. " \
"Move along little buddy."
Rails.logger.debug(message)
nil
elsif rodeo_conformant_uri_exists?(derivative_rodeo_candidate)
Rails.logger.debug("#{self.class}##{__method__} found existing file at location #{derivative_rodeo_candidate}. High five partner!")
derivative_rodeo_candidate
elsif file_set.import_url
Expand All @@ -94,12 +101,14 @@ def preprocessed_location_template
handle_original_file_not_in_derivative_rodeo
else
message = "#{self.class}##{__method__} could not find an existing file at #{derivative_rodeo_candidate} " \
"nor a remote_url for #{file_set.class} ID=#{file_set.id}. Returning `nil' as we have no possible preprocess. " \
"nor a remote_url for #{file_set.class} ID=#{file_set.id} #to_param=#{file_set&.to_param}. " \
"Returning `nil' as we have no possible preprocess. " \
"Maybe the input_uri #{input_uri.inspect} will be adequate."
Rails.logger.warn(message)
nil
end
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength

##
Expand Down Expand Up @@ -146,7 +155,7 @@ def split_files
rescue => e
message = "#{self.class}##{__method__} encountered `#{e.class}' “#{e}” for " \
"input_uri: #{input_uri.inspect}, " \
"output_location_template: #{output_location_template.inspect}, and" \
"output_location_template: #{output_location_template.inspect}, and " \
"preprocessed_location_template: #{preprocessed_location_template.inspect}."
exception = RuntimeError.new(message)
exception.set_backtrace(e.backtrace)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def self.conditionally_destroy_spawned_children_of(file_set:, work:)
return unless child_model
return unless file_set.class.pdf_mime_types.include?(file_set.mime_type)

IiifPrint::PendingRelationship.where(parent_id: work.id, file_id: file_set.id).each(&:destroy)
IiifPrint::PendingRelationship.where(parent_id: work.id, file_id: file_set.id).find_each(&:destroy)
destroy_spawned_children(model: child_model, file_set: file_set, work: work)
end

Expand Down
Loading