Created
June 24, 2012 11:23
-
-
Save sck/2982867 to your computer and use it in GitHub Desktop.
Use ScanSnap's 'Scan to Searchable PDF' even with PDFs not created by ScanSnap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
require 'tmpdir' | |
def fixed?(fn) fn =~ /\.searchable\.pdf/; end | |
def searchable_fn(fn) | |
return fn if fixed?(fn) | |
"#{File.dirname(fn)}/#{File.basename(fn, File.extname(fn))}.searchable.pdf" | |
end | |
def have_fix?(fn) | |
return false if fixed?(fn) | |
return File.exists?(searchable_fn(fn)) | |
end | |
$fn = ARGV.shift | |
$fn = searchable_fn($fn) if have_fix?($fn) | |
def needs_creator_fixed? | |
info=`pdfinfo #{$fn.inspect}`.split("\n"). | |
grep(/Creator/).first !~ /ABBYY FineReader/ | |
end | |
def fix_creator | |
log "Fixing creator" | |
Dir.mktmpdir {|d| | |
pdftk_desc = "#{d}/pdftk_desc.txt" | |
File.open(pdftk_desc, "w") {|w| | |
w.puts(<<EODESC) | |
InfoKey: Creator | |
InfoValue: ABBYY FineReader for ScanSnap (Mac edition) | |
EODESC | |
} | |
nfn = searchable_fn($fn) | |
system "pdftk #{$fn.inspect} update_info #{pdftk_desc} output #{nfn.inspect}" | |
$fn = nfn | |
} | |
end | |
def does_pdf_file_contain_text? | |
text=`pdftotext #{$fn.inspect} /dev/stdout` | |
text =~ /\w+/ | |
end | |
def log(m) | |
puts "[#{$0.gsub(/.*?(\w+$)/, "\\1")}] #{m}" | |
end | |
def make_searchable | |
log "Invoking PDF OCR" | |
system "open -a 'Scan to Searchable PDF' #{$fn.inspect}" | |
end | |
if !does_pdf_file_contain_text? | |
log "Not yet searchable" | |
if needs_creator_fixed? | |
fix_creator | |
raise "Failed to fix creator" if needs_creator_fixed? | |
end | |
make_searchable | |
$stdout.write "Hit enter when OCR is finished..." | |
$stdin.gets | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment