diff --git a/Gemfile b/Gemfile index c80f64a24..c08e5b813 100644 --- a/Gemfile +++ b/Gemfile @@ -33,7 +33,6 @@ gem "ezid-client", "1.9.4" # v1.9.0 introduces response errors in our tests/stub gem "faker" gem "ffi", "~> 1.16.0" gem "filewatcher", "~> 1.0" -gem "flipper" gem "flutie" gem "font-awesome-rails" gem "google-cloud-pubsub" @@ -68,8 +67,8 @@ gem "mime-types" gem "mini_magick" gem "modernizr-rails" # Pin because capistrano raises an error at >= 7.2 -gem "net-ssh", "~> 7.1.0" gem "net-sftp" +gem "net-ssh", "~> 7.1.0" gem "normalize-rails" gem "oai" gem "omniauth", "1.9.2" diff --git a/Gemfile.lock b/Gemfile.lock index 9622df23b..e632e23d6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -439,8 +439,6 @@ GEM ffi (1.16.3) filewatcher (1.1.1) optimist (~> 3.0) - flipper (1.3.2) - concurrent-ruby (< 2) flutie (2.2.0) font-awesome-rails (4.7.0.8) railties (>= 3.2, < 8.0) @@ -1291,7 +1289,6 @@ DEPENDENCIES faker ffi (~> 1.16.0) filewatcher (~> 1.0) - flipper flutie font-awesome-rails foreman diff --git a/app/jobs/create_ocr_request_job.rb b/app/jobs/create_ocr_request_job.rb index 653354eaf..0f3fe640d 100644 --- a/app/jobs/create_ocr_request_job.rb +++ b/app/jobs/create_ocr_request_job.rb @@ -10,15 +10,7 @@ def perform(file_path:) ocr_request = OcrRequest.new(filename: filename, state: "Enqueued") ocr_request.save! ocr_request.pdf.attach(io: File.open(file_path), filename: filename, content_type: "application/pdf") - out_path = File.join(ocr_out_dir, filename) - PdfOcrJob.perform_later(resource: ocr_request, out_path: out_path) + PdfOcrJob.perform_later(resource: ocr_request) File.delete(file_path) end - - def ocr_out_dir - out_dir = Figgy.config["ocr_out_path"] - FileUtils.mkdir_p(out_dir) unless File.directory?(out_dir) - - out_dir - end end diff --git a/app/jobs/pdf_ocr_job.rb b/app/jobs/pdf_ocr_job.rb index db15fd6bb..87e1b2808 100644 --- a/app/jobs/pdf_ocr_job.rb +++ b/app/jobs/pdf_ocr_job.rb @@ -2,12 +2,11 @@ class PdfOcrJob < ApplicationJob queue_as :high - attr_reader :blob, :out_path, :resource + attr_reader :blob, :resource - def perform(resource:, out_path:) + def perform(resource:) logger.info("PDF OCR job initiated for: #{resource.filename}") @resource = resource - @out_path = out_path @blob = resource.pdf # Required for ActiveStorage blob to tempfile method. update_state(state: "Processing") return unless pdf_attached? @@ -40,4 +39,16 @@ def update_state(state:, message: nil) resource.note = message if message resource.save end + + def out_path + File.join(ocr_out_dir, resource.filename) + end + + def ocr_out_dir + @ocr_out_dir ||= begin + path = Figgy.config["ocr_out_path"] + FileUtils.mkdir_p(path) unless File.directory?(path) + path + end + end end diff --git a/spec/jobs/pdf_ocr_job_spec.rb b/spec/jobs/pdf_ocr_job_spec.rb index 3c089b582..e816d291c 100644 --- a/spec/jobs/pdf_ocr_job_spec.rb +++ b/spec/jobs/pdf_ocr_job_spec.rb @@ -4,7 +4,7 @@ RSpec.describe PdfOcrJob do describe "#perform" do let(:out_dir) { Figgy.config["ocr_out_path"] } - let(:out_path) { File.join(out_dir, "ocr-sample.pdf") } + let(:out_path) { File.join(out_dir, "sample.pdf") } let(:resource) { FactoryBot.create(:ocr_request, file: fixture_path) } before do @@ -21,7 +21,7 @@ let(:fixture_path) { Rails.root.join("spec", "fixtures", "files", "sample.pdf") } it "creates on OCRed PDF in an output directory and deletes the attached PDF" do - expect { described_class.perform_now(resource: resource, out_path: out_path) } + expect { described_class.perform_now(resource: resource) } .to change { File.exist?(out_path) } .from(false).to(true) ocr_request = OcrRequest.all.first @@ -34,7 +34,7 @@ let(:fixture_path) { Rails.root.join("spec", "fixtures", "files", "bad.pdf") } it "saves error on the ocr request resource and copies original file to out path" do - described_class.perform_now(resource: resource, out_path: out_path) + described_class.perform_now(resource: resource) ocr_request = OcrRequest.all.first expect(ocr_request.state).to eq "Error" expect(ocr_request.note).to include "PDF OCR job failed" @@ -47,7 +47,7 @@ let(:resource) { FactoryBot.create(:ocr_request) } it "adds an error message to the ocr request resource" do - described_class.perform_now(resource: resource, out_path: out_path) + described_class.perform_now(resource: resource) ocr_request = OcrRequest.all.first expect(ocr_request.state).to eq "Error" expect(ocr_request.note).to include "Resource has no attached PDF"