Created
October 28, 2011 18:05
-
-
Save kingcu/1322924 to your computer and use it in GitHub Desktop.
Charles Proxy dump parser for Tyler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#run this file by typing 'ruby tyler.rb' on your command line. | |
#make sure the tyler_garbage_data.txt file is in the same directory, | |
#and that you are also in that directory on your command line. | |
# | |
#A project for you would be to figure out how to pass the ruby | |
#script the garbage data file on teh command line, so you can | |
#have it named whatever. | |
# | |
#Finally, to capture the output of this script, you'll want to | |
#execute it with something like | |
# 'ruby tyler.rb > output.txt' | |
#otherewise the stuff will just print out on the screen and won't make | |
#it into a file. That's another good exercise, figure out how to use | |
#File.write() to write this data to a particular file. | |
#bring in the library for parsing JSON data - JSON is just another | |
#way to transfer data back and forth between computers, like XML. | |
# | |
#if you don't have it installed (there is an error) you'll need | |
#to do something like 'gem install json' on your command line | |
require 'json' | |
#make an array to hold our objects we get from the file | |
json_data = [] | |
#open the file and read the data into a local variable 'txt' | |
#notice the second argument of "r:binary" - this is important | |
#because we are telling ruby that the file we are opening contains | |
#data other than just text. If we don't tell ruby this, it will | |
#error when it encounters non-text stuff. The non-text data in | |
#this file appear to be image thumbnails for what it's worth. | |
# | |
#http://www.ruby-doc.org/core-1.9.2/File.html#method-c-open | |
# | |
File.open("./tyler_garbage_data.txt", "r:binary") do |file| | |
#set state - since the file wraps each data line inbetween two | |
#distinct lines, we set this to true to indicate "ok, the next | |
#line after this is going to be a data line" | |
next_is_data = false | |
#ok lets start actually looping through each line (text followed | |
#by a newline character) | |
file.each do |line| | |
#if the line we are at matches the pattern of the "start of data" | |
#line i mentioned above, set the state and then call 'next' to skip | |
#the rest of the code in this section | |
# | |
#readup on regular expressions (regex) for more information, but it's | |
#a hell of a topic | |
if line =~ /Response-body:<<--EOF/i | |
next_is_data = true | |
next | |
end | |
#if the last line was data (meaning it matched the comparison | |
#above) then let's try and parse it | |
if next_is_data | |
#a begin/rescue/end block is a chunk of code we basically | |
#expect to fail. If there is an error executing the code in the | |
#begin block, we stop and then execute the code in the rescue block | |
begin | |
#try and parse the line as JSON. If it's not JSON (half the data | |
#in the file isn't), then it will error. | |
js = JSON.parse(line) | |
#wow we got here, that means parsing as JSON didn't error, so let's | |
#store the data away in our good data store | |
json_data << js | |
rescue | |
#fuck, not JSON, garbage data, don't do anything | |
end | |
end | |
#now that we processed the data as either good or bad, we can move | |
#on and set the state back to false. | |
next_is_data = false | |
end | |
end | |
#here are two data stores (arrays) for putting the two categories | |
#of items in, one for products and one for categories | |
categories = [] | |
products = [] | |
#loop through the data array and execute some code for each item, | |
#which is stored as 'js' | |
# | |
#http://www.ruby-doc.org/core-1.9.2/Array.html#method-i-each | |
json_data.each do |js| | |
#check that the data has an attribute called 'name' - if it has | |
#this category, it's product data we are interested in | |
if js["name"] | |
if js["type"] == "Product" | |
products << js | |
elsif js["type"] == "Category" | |
categories << js | |
end | |
end | |
end | |
#this is tricky looking, but pretty simple. Just make an array | |
#of the two arrays of data, and loop through it. This way, I don't | |
#have to copy code to do the same thing on each array of items. | |
[categories, products].each do |arr| | |
arr.each do |js| | |
str = [] | |
#the items we stored as good in the above parsing are called Hashes. | |
#each_pair() is a method that looks through each key/value combo | |
#and gives you the key and value | |
# | |
#http://www.ruby-doc.org/core-1.9.2/Hash.html#method-i-each_pair | |
js.each_pair do |key, val| | |
#don't put any images in our data, because there are alot and it's ugly | |
next if key == "images" | |
#store the key/value string, tab separated (the \t makes a computer tab) | |
#in our array of strings to print out. | |
# | |
#http://www.ruby-doc.org/core-1.9.2/Array.html#method-i-3C-3C | |
str << "#{key}:\t\t\t#{val}" | |
end | |
#we have all our strings saved up, let's tell ruby to print them all out, | |
#and to automatically separate them with newlines | |
# | |
#http://www.ruby-doc.org/core-1.9.2/Array.html#method-i-join | |
puts str.join("\n") | |
#put a final newline inbetween our product dumps, so we can tell them apart | |
#for readability you could also add a dashed line or something to visually | |
#separate them...puts "--------------------------") or something like that | |
puts "\n" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment