seba-- · December 21, 2015 10:49
diff --git a/.gitconfig b/.gitconfig
 [diff "word"]
  binary = true
  textconv = docx-to-txt.rb -t
diff --git a/docx-to-txt.rb b/docx-to-txt.rb
 #! /usr/bin/env ruby
 # Simplistic DOCX to plain text converter, loosely based on the
 # Simplistic OpenDocument Text (.odt) to plain text converter.
 #  Author: Jason Rogers <https://github.com/jacaetevha>
 # 
 # Changed 2013-08-21 by Sebastian Erdweg <https://github.com/seba-->
 #   Put a newline after any closing paragraph to retain the
 #   paragraph structure of the docx document.
 #
 # Assumes that you have the unzip and tidy commands available for your system

 require 'optparse'
 options = {}

 optparse = OptionParser.new do|opts|
  opts.banner = "Usage: #{File.basename __FILE__} [options] file"

  options[:text_only] = false
  opts.on( '-t', '--text-only', 'Output less information' ) do
    options[:text_only] = true
  end

  opts.on( '-h', '--help', 'Display this screen' ) do
    puts opts
    exit
  end
 end

 optparse.parse!

 if ARGV[0].nil?
  puts "No filename given!\n"
  puts "Usage: #{File.basename __FILE__} filename\n"
  exit 1;
 end

 unless File.exist?(ARGV[0])
  puts "File does not exist!\n"
  puts "Usage: #{File.basename __FILE__} filename\n"
  exit 1;
 end

 command  = "unzip -qq -p '#{ARGV[0]}' word/document.xml"
 command += "  | tidy -utf8 -xml -w 255 -i -c -q -asxml" unless options[:text_only]
 content = `#{command}`
 if options[:text_only]
  content.gsub! /<\/w:p>/, "\n"                    # replace end-of-paragraph tag </w:p> by newline
  content.gsub! /<[^>]+>/, ''                      # remove all XML tags
  content.gsub! /\n{2,}/, "\n\n"                   # remove multiple blank lines
  content.gsub! /\A\n+/, ''                        # remove leading blank lines
 end
 puts content
	#! /usr/bin/env ruby
	# Simplistic DOCX to plain text converter, loosely based on the
	# Simplistic OpenDocument Text (.odt) to plain text converter.
	# Author: Jason Rogers <https://github.com/jacaetevha>
	#
	# Changed 2013-08-21 by Sebastian Erdweg <https://github.com/seba-->
	# Put a newline after any closing paragraph to retain the
	# paragraph structure of the docx document.
	#
	# Assumes that you have the unzip and tidy commands available for your system

	require 'optparse'
	options = {}

	optparse = OptionParser.new do\|opts\|
	opts.banner = "Usage: #{File.basename __FILE__} [options] file"

	options[:text_only] = false
	opts.on( '-t', '--text-only', 'Output less information' ) do
	options[:text_only] = true
	end

	opts.on( '-h', '--help', 'Display this screen' ) do
	puts opts
	exit
	end
	end

	optparse.parse!

	if ARGV[0].nil?
	puts "No filename given!\n"
	puts "Usage: #{File.basename __FILE__} filename\n"
	exit 1;
	end

	unless File.exist?(ARGV[0])
	puts "File does not exist!\n"
	puts "Usage: #{File.basename __FILE__} filename\n"
	exit 1;
	end

	command = "unzip -qq -p '#{ARGV[0]}' word/document.xml"
	command += " \| tidy -utf8 -xml -w 255 -i -c -q -asxml" unless options[:text_only]
	content = `#{command}`
	if options[:text_only]
	content.gsub! /<\/w:p>/, "\n" # replace end-of-paragraph tag </w:p> by newline
	content.gsub! /<[^>]+>/, '' # remove all XML tags
	content.gsub! /\n{2,}/, "\n\n" # remove multiple blank lines
	content.gsub! /\A\n+/, '' # remove leading blank lines
	end
	puts content