Skip to content

Commit

Permalink
Merge pull request #23 from rogeriochaves/master
Browse files Browse the repository at this point in the history
Added server mode with tests
  • Loading branch information
Erol committed Dec 20, 2014
2 parents d6bad63 + 0d072bb commit fc5efae
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 12 deletions.
105 changes: 93 additions & 12 deletions lib/yomu.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
require 'mime/types'
require 'json'

require 'socket'
require 'stringio'

class Yomu
GEMPATH = File.dirname(File.dirname(__FILE__))
JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.6.jar')
DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port

@@server_port = nil
@@server_pid = nil

# Read text or metadata from a data buffer.
#
Expand All @@ -15,6 +22,21 @@ class Yomu
# metadata = Yomu.read :metadata, data

def self.read(type, data)
result = @@server_pid ? self._server_read(type, data) : self._client_read(type, data)

case type
when :text
result
when :html
result
when :metadata
JSON.parse(result)
when :mimetype
MIME::Types[JSON.parse(result)['Content-Type']].first
end
end

def self._client_read(type, data)
switch = case type
when :text
'-t'
Expand All @@ -25,23 +47,35 @@ def self.read(type, data)
when :mimetype
'-m -j'
end
result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|

IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
io.write data
io.close_write
io.read
end
end

case type
when :text
result
when :html
result
when :metadata
JSON.parse(result)
when :mimetype
MIME::Types[JSON.parse(result)['Content-Type']].first

def self._server_read(_, data)
s = TCPSocket.new('localhost', @@server_port)
file = StringIO.new(data, 'r')

while 1
chunk = file.read(65536)
break unless chunk
s.write(chunk)
end

# tell Tika that we're done sending data
s.shutdown(Socket::SHUT_WR)

resp = ''
while 1
chunk = s.recv(65536)
break if chunk.empty? || !chunk
resp << chunk
end
resp
end

# Create a new instance of Yomu with a given document.
Expand Down Expand Up @@ -137,7 +171,6 @@ def creation_date
end
end


def path?
defined? @path
end
Expand Down Expand Up @@ -180,6 +213,54 @@ def data
@data
end

# Returns pid of Tika server, started as a new spawned process.
#
# type :html, :text or :metadata
# custom_port e.g. 9293
#
# Yomu.server(:text, 9294)
#
def self.server(type, custom_port=nil)
switch = case type
when :text
'-t'
when :html
'-h'
when :metadata
'-m -j'
when :mimetype
'-m -j'
end

@@server_port = custom_port || DEFAULT_SERVER_PORT

@@server_pid = Process.spawn("#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} --server --port #{@@server_port} #{switch}")
sleep(2) # Give the server 2 seconds to spin up.
@@server_pid
end

# Kills server started by Yomu.server
#
# Always run this when you're done, or else Tika might run until you kill it manually
# You might try putting your extraction in a begin..rescue...ensure...end block and
# putting this method in the ensure block.
#
# Yomu.server(:text)
# reports = ["report1.docx", "report2.doc", "report3.pdf"]
# begin
# my_texts = reports.map{|report_path| Yomu.new(report_path).text }
# rescue
# ensure
# Yomu.kill_server!
# end
def self.kill_server!
if @@server_pid
Process.kill('INT', @@server_pid)
@@server_pid = nil
@@server_port = nil
end
end

def self.java
ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
end
Expand Down
30 changes: 30 additions & 0 deletions spec/yomu_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,34 @@
expect( yomu.metadata['Content-Type'] ).to eql ["application/vnd.apple.pages", "application/vnd.apple.pages"]
end
end

context 'working as server mode' do
specify '#starts and kills server' do
begin
Yomu.server(:text)
expect(Yomu.class_variable_get(:@@server_pid)).not_to be_nil
expect(Yomu.class_variable_get(:@@server_port)).not_to be_nil

s = TCPSocket.new('localhost', Yomu.class_variable_get(:@@server_port))
expect(s).to be_a TCPSocket
s.close
ensure
port = Yomu.class_variable_get(:@@server_port)
Yomu.kill_server!
sleep 2
expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
end
end

specify '#runs samples through server mode' do
begin
Yomu.server(:text)
expect(Yomu.new('spec/samples/sample.pages').text).to include 'The quick brown fox jumped over the lazy cat.'
expect(Yomu.new('spec/samples/sample filename with spaces.pages').text).to include 'The quick brown fox jumped over the lazy cat.'
expect(Yomu.new('spec/samples/sample.docx').text).to include 'The quick brown fox jumped over the lazy cat.'
ensure
Yomu.kill_server!
end
end
end
end

0 comments on commit fc5efae

Please sign in to comment.