Skip to content

Instantly share code, notes, and snippets.

@nnslvp
Created February 9, 2021 12:32
Show Gist options
  • Save nnslvp/bfa29787d8058e244579b1621fce48ad to your computer and use it in GitHub Desktop.
Save nnslvp/bfa29787d8058e244579b1621fce48ad to your computer and use it in GitHub Desktop.
Concat large text files tool
# frozen_string_literal: true
require 'optparse'
require 'sys/filesystem' # gem install sys-filesystem
class ConcatOptionsParser
attr_reader :parts, :output, :keep
def initialize
opts = OptionParser.new do |parser|
parser.banner = "\nUsage: ruby sims_concat.rb <pats_path> -o "
parser.separator ''
parser.separator 'Options:'
parser.on('-k', '--keep', TrueClass, 'The keep parts files flag.') do |keep_flag|
@keep = keep_flag
end
parser.on('-o', '--output STR', String, 'The output path.') do |output|
@output = output
end
parser.on_tail('-h', '--help', '--usage', 'Show this usage message and quit.') do
puts parser.help
puts ''
exit
end
end
opts.parse!(ARGV)
@parts = ARGV
unless @output
puts ''
puts 'Error: -o --output is required'
puts 'For help: ruby sim.rb -h'
puts ''
exit
end
unless @parts
puts ''
puts 'Error: <pats_path> is required'
puts 'For help: ruby sim.rb -h'
puts ''
exit
end
end
end
class Concat
def initialize(parts_paths, result_path, keep)
@parts_paths = parts_paths
@result_path = result_path
@keep = keep
end
def call
raise 'Output file already exist' if File.exists? @result_path
result_file = File.new(@result_path, 'w')
result_file.close
@parts_paths.
sort.sort_by(&:length).
map { |i| i.gsub(/ /, '\ ') }.
each do |part_path|
print part_path
check_free_space!(result_file.path, part_path)
if @keep
`cat #{part_path} >> #{result_file.path} `
else
`cat #{part_path} >> #{result_file.path} && rm #{part_path}`
end
puts ' Ready.'
end
puts 'Output: '
puts result_file.path
end
def check_free_space!(folder_path, part_path)
info = Sys::Filesystem.stat(folder_path)
available = info.block_size * info.blocks_available
part_size = File.size(part_path)
raise "Not enough free space for concat part: #{part_path}" if part_size > available
end
end
options = ConcatOptionsParser.new
Concat.new(options.parts, options.output, options.keep).call
@nnslvp
Copy link
Author

nnslvp commented Feb 9, 2021

Example using: ruby concat.rb parts_folder/* -o merged_data.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment