diff --git a/index.html b/index.html index 001087b..3d556da 100755 --- a/index.html +++ b/index.html @@ -228,7 +228,9 @@
docsplit pdf documentation/*.html
diff --git a/lib/docsplit.rb b/lib/docsplit.rb index 5d0fa84..c53646a 100755 --- a/lib/docsplit.rb +++ b/lib/docsplit.rb @@ -62,6 +62,7 @@ def self.extract_images(pdfs, opts={}) # If the document is in an image format, use GraphicsMagick to extract the PDF. def self.extract_pdf(docs, opts={}) out = opts[:output] || '.' + timeout = opts[:timeout] || 3600 FileUtils.mkdir_p out unless File.exists?(out) [docs].flatten.each do |doc| ext = File.extname(doc) @@ -71,7 +72,7 @@ def self.extract_pdf(docs, opts={}) if GM_FORMATS.include?(`file -b --mime #{ESCAPE[doc]}`.strip.split(/[:;]\s+/)[0]) `gm convert #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf` else - options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -r #{ROOT}/vendor/conf/document-formats.js" + options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -t #{timeout} -r #{ROOT}/vendor/conf/document-formats.js" run "#{options} #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf", [], {} end end diff --git a/lib/docsplit/command_line.rb b/lib/docsplit/command_line.rb index 8d48500..22a9dad 100755 --- a/lib/docsplit/command_line.rb +++ b/lib/docsplit/command_line.rb @@ -94,6 +94,9 @@ def parse_options opts.on('--no-clean', 'disable cleaning of OCR\'d text') do |c| @options[:clean] = false end + opts.on('-t', '--timeout [SEC]', 'Timeout for PDF extraction from OpenOffice supported document format (default is 1 hour)') do |t| + @options[:timeout] = t + end opts.on('-r', '--rolling', 'generate images from each previous image') do |r| @options[:rolling] = true end @@ -116,4 +119,4 @@ def parse_options end -end \ No newline at end of file +end