Skip to content

Instantly share code, notes, and snippets.

@andynu
Created July 2, 2022 21:53
Show Gist options
  • Save andynu/88cf6e14908ae0eb256e640ec8d21d79 to your computer and use it in GitHub Desktop.
Save andynu/88cf6e14908ae0eb256e640ec8d21d79 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'chronic'
require 'set'
if ARGV.empty?
ARGV.replace ['log/production.log']
end
# From facets gem
module Enumerable
def squeeze(*limited_to)
first = true
r = [] # result
c = nil # current
each do |e|
if !limited_to.empty? && !limited_to.include?(e)
r << e
elsif first || c != e
r << e
first = false
c = e
end
end
r
end
end
Transaction = Struct.new(:start_time, :end_time, :tags, :messages) do
def split_url
url = messages.grep(/Started/).first.gsub(/Started /, '').gsub(/ for .*/, '')
url, querystring = url.split(/\?/)
url.gsub!(/"/, '')
return url, querystring
end
def url
split_url.first
end
def user
tags[0]
end
def session_id
tags[1]
end
def request_id
tags[2]
end
end
class Node
attr_reader :value, :children, :count
attr_accessor :latest_time
def self.instance(value)
@@nodes ||= {}
@@nodes[value] ||= Node.new value
@@nodes[value]
end
def initialize(value)
@value = value
@children = Set.new
@count = 0
end
def add(node)
@children << node
end
def increment
@count += 1
end
end
Sess = Struct.new(:transactions) do
def start_time
transactions.first.start_time
end
def user
transactions.first.user
end
def urls(squeeze: true, scrub: true)
url_arr = transactions.map(&:url)
url_arr = url_arr.map{|url| url.gsub(/\d+/, '#').squeeze('#')} if scrub
url_arr = url_arr.squeeze if squeeze
url_arr
end
end
def url_trie(transactions)
root = nil
transactions.each do |trans|
url = trans.url
url = url.gsub(/\d+/, '#').squeeze('#')
url.squeeze!('/')
if url[-1] == '/'
url = url[0..-2]
end
# trans.urls(squeeze: false).each do |url|
meth, path = url.split(/ /)
path = path + '-' + meth
path_parts = path.split(/\//)
node = Node.instance path_parts.shift
root ||= node
path_parts.each do |path_part|
next_node = Node.instance(path_part)
node.add next_node
node = next_node
end
node.increment
if node.latest_time.nil? || node.latest_time < trans.start_time
node.latest_time = trans.start_time
end
end
root
end
def load_transactions
transactions = {}
ARGF.each_line do |line|
time, log = line.split(/\|/)
if log =~ /\A ((?:\[[^\]]+\] )+)(.*)\Z/
tags = $1
msg = $2
tags = tags.strip.split(/ /).map{|tag| tag.gsub(/[\[\]]/, '')}
transactions[tags] ||= Transaction.new(Chronic.parse(time), nil, tags, [])
transactions[tags].messages << msg
transactions[tags].end_time = Chronic.parse(time)
end
end
transactions.values
end
transactions = load_transactions
#pp transactions.map(&:url).tally
root = url_trie(transactions)
# sessions = {}
# transactions.each do |trans|
# sessions[trans.session_id] ||= Sess.new([])
# sessions[trans.session_id].transactions << trans
# end
#pp sessions.values.map{|s| [s.start_time, s.user, s.urls] }
# pp sessions.values.map{|s| [s.url_trie] }
#puts requests
def print_tree(node, depth=0)
puts ("\t" * depth) + node.value + (node.children.empty? ? " (#{node.count} - #{node.latest_time&.to_date})" : '')
node.children.each do |child|
print_tree(child, depth + 1)
end
end
print_tree(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment