Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract image or pdf on windows platform bugfix #90

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/docsplit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ module Docsplit

VERSION = '0.7.2' # Keep in sync with gemspec.

ESCAPE = lambda {|x| Shellwords.shellescape(x) }
HOST_OS = (defined?("RbConfig") ? RbConfig : Config)::CONFIG['host_os']
IS_WIN = !!HOST_OS.match(/mswin|msys|mingw|cygwin|bccwin|wince|emc/i)

ESCAPE = IS_WIN ? lambda {|x| "\"#{x}\"" } : lambda {|x| Shellwords.shellescape(x) }

ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
ESCAPED_ROOT = ESCAPE[ROOT]
Expand Down
20 changes: 16 additions & 4 deletions lib/docsplit/image_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,25 @@ def convert(pdf, size, format, previous=nil)
FileUtils.mkdir_p(directory) unless File.exists?(directory)
common = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}"
if previous
FileUtils.cp(Dir[directory_for(previous) + '/*'], directory)
result = `MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1`.chomp
# Only copy image files, skip other files such as Thumbs.db under windows platform
imageFiles = File.join(directory_for(previous), '*.' + format)
FileUtils.cp(Dir.glob(imageFiles), directory)
if IS_WIN
cmd = "set MAGICK_TMPDIR=#{tempdir} & set OMP_NUM_THREADS=2 & gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1".chomp
else
cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1".chomp
end
result = `#{cmd}`.chomp
raise ExtractionFailed, result if $? != 0
else
page_list(pages).each do |page|
out_file = ESCAPE[File.join(directory, "#{basename}_#{page}.#{format}")]
cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] #{out_file} 2>&1".chomp
if IS_WIN
out_file = File.join(directory, "#{basename}_#{page}.#{format}")
cmd = "set MAGICK_TMPDIR=#{tempdir} & set OMP_NUM_THREADS=2 & gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] \"#{out_file}\" 2>&1".chomp
else
out_file = ESCAPE[File.join(directory, "#{basename}_#{page}.#{format}")]
cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] #{out_file} 2>&1".chomp
end
result = `#{cmd}`.chomp
raise ExtractionFailed, result if $? != 0
end
Expand Down
28 changes: 24 additions & 4 deletions lib/docsplit/pdf_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class PdfExtractor
# Provide a set of helper functions to determine the OS.
HOST_OS = (defined?("RbConfig") ? RbConfig : Config)::CONFIG['host_os']
def windows?
!!HOST_OS.match(/mswin|windows|cygwin/i)
IS_WIN
end
def osx?
!!HOST_OS.match(/darwin/i)
Expand All @@ -22,7 +22,13 @@ def version_string
@@help ||= `#{office_executable} -h 2>&1`.split("\n").first
end
def libre_office?
!!version_string.match(/^LibreOffice/)
if windows?
# on windows platform we can't get version string by 'version_string' func,
# so we simply match the executable path
!!office_executable.match(/libreOffice/i)
else
!!version_string.match(/^LibreOffice/)
end
end
def open_office?
!!version_string.match(/^OpenOffice.org/)
Expand Down Expand Up @@ -116,7 +122,14 @@ def extract(docs, opts)
ENV['SYSUSERCONFIG']="file://#{File.expand_path(escaped_out)}"

options = "--headless --invisible --norestore --nolockcheck --convert-to pdf --outdir #{escaped_out} #{escaped_doc}"
cmd = "#{office_executable} #{options} 2>&1"

# quote path on windows platform to avoid wrong path issue
if windows?
cmd = "\"#{office_executable}\" #{options} 2>&1"
else
cmd = "#{office_executable} #{options} 2>&1"
end

result = `#{cmd}`.chomp
raise ExtractionFailed, result if $? != 0
true
Expand All @@ -141,7 +154,14 @@ def run_jod(command, pdfs, opts, return_output=false)

pdfs = [pdfs].flatten.map{|pdf| "\"#{pdf}\""}.join(' ')
office = osx? ? "-Doffice.home=#{office_path}" : office_path
cmd = "java #{HEADLESS} #{LOGGING} #{office} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1"

# quote path on windows platform to avoid wrong path issue
if windows?
cmd = "java #{HEADLESS} #{LOGGING} \"#{office}\" -cp #{CLASSPATH} #{command} #{pdfs} 2>&1"
else
cmd = "java #{HEADLESS} #{LOGGING} #{office} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1"
end

result = `#{cmd}`.chomp
raise ExtractionFailed, result if $? != 0
return return_output ? (result.empty? ? nil : result) : true
Expand Down