Skip to content

Instantly share code, notes, and snippets.

@orangewolf
Created May 27, 2022 23:01
Show Gist options
  • Save orangewolf/2d105c5b281506c909112915526d5d67 to your computer and use it in GitHub Desktop.
Save orangewolf/2d105c5b281506c909112915526d5d67 to your computer and use it in GitHub Desktop.
Active Fedora ID based Reindex
namespace :reindex do
def formated_offset(offset)
offset.to_s.rjust(5, '0')
end
def print_docs(docs, offset)
File.open("tmp/to_index_#{formated_offset(offset)}.log", "w") do |f|
docs.each do |doc|
f.puts doc['id']
end
end
end
def print_all_solr_ids
offset = 0
page_size = 500
q = ActiveFedora::SolrService.get("*:*", rows: page_size, start: offset * page_size); nil
total = q['response']['numFound']
print_docs(q['response']['docs'], offset); nil
while(offset * page_size < total)
offset += 1
q = ActiveFedora::SolrService.get("*:*", rows: page_size, start: offset * page_size); nil
print_docs(q['response']['docs'], offset)
puts "#{offset * page_size} of #{total}"
end
end
desc 'Create files file cache for all solr ids'
task export: [:environment] do
print_all_solr_ids
end
def index_from_file(offset)
File.foreach("tmp/to_index_#{formated_offset(offset)}.log").with_index do |line, line_num|
puts "#{line_num}: #{line}"
r = ActiveFedora::Base.find(line)
r.update_index
end
end
desc 'Index based on exported id files'
task :import, [:start_id, :end_id] [:environment] do |t, args|
start_id = args[:start_id]&.to_i || 0
end_id = args[:end_id]&.to_i || Dir.glob('tmp/to_index_*').sort.last.gsub('tmp/to_index_', '').to_i
start_id.upto(end_id) do |i|
index_from_file(i)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment