Skip to content

Instantly share code, notes, and snippets.

@camallen
Last active June 18, 2019 10:25
Show Gist options
  • Save camallen/8b82382df0363d424013b24813d8f803 to your computer and use it in GitHub Desktop.
Save camallen/8b82382df0363d424013b24813d8f803 to your computer and use it in GitHub Desktop.
Manual classification csv exports for a panoptes project
# Manual csv classifications dump
# ensure the config/database.yml is configured to use the read replica database and not the production db.
#
# run via rails runner from the panoptes cmd line via
# rails r project_classifications_csv_dump_export.rb
require 'csv'
PROJECT_ID = 1
@resource = Project.find PROJECT_ID
def completed_resource_classifications
@resource
.classifications
.complete
.joins(:workflow).where(workflows: {activated_state: "active"})
.includes(:user, :workflow)
end
def setup_subjects_cache(classifications)
classification_ids = classifications.map(&:id).join(",")
sql = "SELECT classification_id, subject_id FROM classification_subjects where classification_id IN (#{classification_ids})"
c_s_ids = ActiveRecord::Base.connection.select_rows(sql)
@cache.reset_classification_subjects(c_s_ids)
subject_ids = c_s_ids.map { |_, subject_id| subject_id }
@cache.reset_subjects(Subject.unscoped.where(id: subject_ids).load)
subject_ids
end
def setup_retirement_cache(classifications, subject_ids)
workflow_ids = classifications.map(&:workflow_id).uniq
retired_counts = SubjectWorkflowStatus.retired.where(
subject_id: subject_ids,
workflow_id: workflow_ids
).load
@cache.reset_subject_workflow_statuses(retired_counts)
end
csv_file_path = "tmp/classifications_#{PROJECT_ID}_export.csv"
@cache ||= ClassificationDumpCache.new
CSV.open(csv_file_path, 'wb') do |csv|
formatter = Formatter::Csv::Classification.new(@cache)
csv << formatter.headers
completed_resource_classifications.find_in_batches do |batch|
subject_ids = setup_subjects_cache(batch)
setup_retirement_cache(batch, subject_ids)
batch.each do |classification|
csv << formatter.to_array(classification)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment