Skip to content

Instantly share code, notes, and snippets.

@ejlangev
Last active August 12, 2021 17:35
Show Gist options
  • Save ejlangev/59436f6a087c9e2768294a7709c4f074 to your computer and use it in GitHub Desktop.
Save ejlangev/59436f6a087c9e2768294a7709c4f074 to your computer and use it in GitHub Desktop.
require 'pp'
require 'ostruct'
data = File.open('Query Results.csv').map do |l|
pieces = l.tr('{}"', '').strip.split(',', 2)
OpenStruct.new(loan_file_id: pieces[0], all_activities: pieces[1].split(','), all_unique_activities: pieces[1].split(',').uniq)
end
activity_frequency_map = {}
common_subset = []
data.each do |d|
d.all_unique_activities.each do |name|
activity_frequency_map[name] ||= 0
activity_frequency_map[name] += 1
if activity_frequency_map[name] == data.size
common_subset << name
end
end
end
puts "Common Subset of Activities: #{common_subset.size}"
File.open('activity_frequencies.csv', 'w') do |f|
File.truncate('activity_frequencies.csv', 0)
activity_frequency_map.each_pair do |name, value|
f.puts("#{name},#{value}")
end
end
File.open('per_loan_data.csv', 'w') do |f|
File.truncate('per_loan_data.csv', 0)
data.each do |d|
borrower_facing_ad_hoc = d.all_activities.count { |n| ['AnswerAdHocQuestions', 'AdHocDocumentCollection'].include?(n) }
all_ad_hoc = d.all_activities.count { |n| n.include?('AdHoc') }
f.puts("#{d.loan_file_id},#{d.all_activities.size},#{d.all_unique_activities.size},#{borrower_facing_ad_hoc},#{all_ad_hoc}")
end
end
[0.01, 0.03, 0.05, 0.08, 0.10].each do |percent|
rare_activities = activity_frequency_map.keys.select { |n| (activity_frequency_map[n].to_f / data.size) <= percent }
loans_with_rare_activities = data.select { |d| d.all_unique_activities.intersection(rare_activities).size > 0 }
puts "# of activities that happen on < #{percent * 100.0}% of loans: #{rare_activities.size} (#{(rare_activities.size * 100.0 / activity_frequency_map.keys.size).round(2)}%)"
puts "# of loans with an activity that happens < #{percent * 100.0}% of the time: #{loans_with_rare_activities.size} (#{(loans_with_rare_activities.size * 100.0 / data.size).round(2)}%)"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment