Skip to content
This repository has been archived by the owner on May 3, 2023. It is now read-only.

Commit

Permalink
Extracting TIFF splitting logic from IIIF Print
Browse files Browse the repository at this point in the history
As I'm thinking through the logic, I'm realizing that we have a splitter
and a strategy;  the splitter leverages the strategy.  As implemented
the splitter is the strategy is the splitter.

Related to:

- scientist-softserv/iiif_print#194
- samvera/bulkrax#760
- scientist-softserv/utk-hyku#343
- https://github.com/scientist-softserv/adventist-dl/issues/330
  • Loading branch information
jeremyf committed Mar 27, 2023
1 parent 7d235a0 commit 156ec24
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 15 deletions.
7 changes: 4 additions & 3 deletions lib/space_stone/pdf_splitter/pdf_pages_summary.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ module PdfSplitter
PdfPagesSummary = Struct.new(
:path, :page_count, :width,
:height, :pixels_per_inch, :color_description,
:channels, :bits, keyword_init: true
:channels, :bits_per_channel, keyword_init: true
) do
# @return [Array<String, Integer, Integer>]
def color
[color_description, channels, bits]
[color_description, channels, bits_per_channel]
end
alias_method :ppi, :pixels_per_inch
alias_method :bits, :bits_per_channel

# If the underlying extraction couldn't set the various properties, we likely have an
# invalid_pdf.
def valid?
return false if pdf_pages_summary.color_description.nil?
return false if pdf_pages_summary.channels.nil?
return false if pdf_pages_summary.bits.nil?
return false if pdf_pages_summary.bits_per_channel.nil?
return false if pdf_pages_summary.height.nil?
return false if pdf_pages_summary.page_count.to_i.zero?

Expand Down
6 changes: 3 additions & 3 deletions lib/space_stone/pdf_splitter/pdf_pages_summary/extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def call
width = 0
height = 0
channels = 0
bits = 0
bits_per_channel = 0
pixels_per_inch = 0
Open3.popen3(command) do |_stdin, stdout, _stderr, _wait_thr|
stdout.read.split("\n").each_with_index do |line, index|
Expand All @@ -70,7 +70,7 @@ def call
width = cells[COL_WIDTH].to_i if cells[COL_WIDTH].to_i > width
height = cells[COL_HEIGHT].to_i if cells[COL_HEIGHT].to_i > height
channels = cells[COL_CHANNELS].to_i if cells[COL_CHANNELS].to_i > channels
bits = cells[COL_BITS].to_i if cells[COL_BITS].to_i > bits
bits_per_channel = cells[COL_BITS].to_i if cells[COL_BITS].to_i > bits_per_channel

# In the case of poppler version < 0.25, we will have no more than 12 columns. As such,
# we need to do some alternative magic to calculate this.
Expand All @@ -95,7 +95,7 @@ def call
height: height,
color_description: color_description,
channels: channels,
bits: bits
bits_per_channel: bits_per_channel
)
end
# rubocop:enable Metrics/AbcSize
Expand Down
1 change: 1 addition & 0 deletions lib/space_stone/pdf_splitter/strategies.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ module Strategies

require 'space_stone/pdf_splitter/strategies/base'
require 'space_stone/pdf_splitter/strategies/jpg'
require 'space_stone/pdf_splitter/strategies/tiff'
13 changes: 7 additions & 6 deletions lib/space_stone/pdf_splitter/strategies/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def invalid_pdf?
attr_reader :pdf_pages_summary, :tmpdir, :baseid, :pdfpath
private :pdf_pages_summary, :tmpdir, :baseid, :pdfpath

# @api private
def gsdevice
return self.class.gsdevice if self.class.gsdevice

raise NotImplementedError
end

private

# entries for each page
Expand Down Expand Up @@ -94,12 +101,6 @@ def gsconvert
end
# rubocop:enable Metrics/MethodLength

def gsdevice
return self.class.gsdevice if self.class.gsdevice

raise NotImplementedError
end

PAGE_COUNT_REGEXP = %r{^Pages: +(\d+)$}.freeze

def pagecount
Expand Down
47 changes: 47 additions & 0 deletions lib/space_stone/pdf_splitter/strategies/tiff.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# frozen_string_literal: true

module SpaceStone
module PdfSplitter
module Strategies
# The purpose of this class is to split the PDF into constituent tiff files.
class Tiff < Strategies::Base
self.image_extension = 'tiff'
self.compression = 'lzw'

##
# @api private
#
# @param gsdevice [Object]
def gsdevice
return @gsdevice if defined?(@gsdevice)

color = pdf_pages_summary.color_description
channels = pdf_pages_summary.channels
bpc = pdf_pages_summary.bits_per_channel

if color == 'gray'
# CCITT Group 4 Black and White, if applicable:
if bpc == 1
self.compression = 'g4'
return @gsdevice = 'tiffg4'
elsif bpc > 1
# 8 Bit Grayscale, if applicable:
return @gsdevice = 'tiffgray'
end
end

# otherwise color:
@gsdevice = colordevice(channels, bpc)
end

def colordevice(channels, bpc)
bits = bpc * channels
# will be either 8bpc/16bpd color TIFF,
# with any CMYK source transformed to 8bpc RBG
bits = 24 unless [24, 48].include? bits
"tiff#{bits}nc"
end
end
end
end
end
12 changes: 12 additions & 0 deletions spec/space_stone/pdf_splitter/pdf_pages_summary/extractor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
expect(subject.width).to eq(3)
end

it 'determines the bits_per_channel of the given PDF' do
expect(subject.bits_per_channel).to eq(8)
end

it 'determines the color_description of the given PDF' do
expect(subject.color_description).to eq('rgb')
end

it 'determines the channels of the given PDF' do
expect(subject.channels).to eq(3)
end

it 'determines the color of the given PDF' do
expect(subject.color).to eq(['rgb', 3, 8])
end
Expand Down
4 changes: 3 additions & 1 deletion spec/space_stone/pdf_splitter/pdf_pages_summary_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

subject do
described_class.new(path: __FILE__, page_count: 1, width: 2, height: 3,
pixels_per_inch: 4, color_description: 'rgb', channels: 5, bits: 6)
pixels_per_inch: 4, color_description: 'rgb', channels: 5,
bits_per_channel: 6)
end

it { is_expected.to respond_to(:valid?) }
Expand All @@ -31,4 +32,5 @@
it { is_expected.to respond_to(:color_description) }
it { is_expected.to respond_to(:channels) }
it { is_expected.to respond_to(:bits) }
it { is_expected.to respond_to(:bits_per_channel) }
end
2 changes: 1 addition & 1 deletion spec/space_stone/pdf_splitter/strategies/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

describe '#gsdevice' do
it "expects that you will have set .gsdevice in the subclass" do
expect { subject.send(:gsdevice) }.to raise_error(NotImplementedError)
expect { subject.gsdevice }.to raise_error(NotImplementedError)
end
end
end
2 changes: 1 addition & 1 deletion spec/space_stone/pdf_splitter/strategies/jpg_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
end

describe '#gsdevice' do
subject { splitter.send(:gsdevice) }
subject { splitter.gsdevice }
it { is_expected.to eq('jpeg') }
end

Expand Down
49 changes: 49 additions & 0 deletions spec/space_stone/pdf_splitter/strategies/tiff_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# frozen_string_literal: true
require 'spec_helper'

RSpec.describe SpaceStone::PdfSplitter::Strategies::Tiff do
let(:path) { __FILE__ }
let(:pdf_pages_summary) { double(SpaceStone::PdfSplitter::PdfPagesSummary) }

let(:splitter) { described_class.new(path, pdf_pages_summary: pdf_pages_summary) }

describe '.compression' do
subject { described_class.compression }
it { is_expected.to eq('lzw') }
end

describe '.compression?' do
subject { described_class.compression? }
it { is_expected.to be_truthy }
end

describe '.image_extension' do
subject { described_class.image_extension }
it { is_expected.to eq('tiff') }
end

describe '#gsdevice' do
DEFAULT_SUMMARY_ATTRIBUTES = {
page_count: 10,
color_description: 'rgb',
bits_per_channel: 0,
channels: 0
}.freeze

[
[{ color_description: 'gray', bits_per_channel: 2 }, 'tiffgray'],
[{ color_description: 'gray', bits_per_channel: 1 }, 'tiffg4'],
[{ color_description: 'rgb', bits_per_channel: 1 }, 'tiff24nc'],
[{ color_description: 'rgb', channels: 8, bits_per_channel: 6 }, 'tiff48nc'],
[{ color_description: 'rgb', channels: 8, bits_per_channel: 5 }, 'tiff24nc'],
[{ color_description: 'rgb', channels: 8, bits_per_channel: 3 }, 'tiff24nc']
].each do |attributes, expected_value|
context 'with #{attributes.inspect}' do
it "is expected to be #{expected_value.inspect}" do
summary = SpaceStone::PdfSplitter::PdfPagesSummary.new(**DEFAULT_SUMMARY_ATTRIBUTES.merge(attributes))
expect(described_class.new(__FILE__, pdf_pages_summary: summary).gsdevice).to eq(expected_value)
end
end
end
end
end

0 comments on commit 156ec24

Please sign in to comment.