Created
August 26, 2011 02:08
-
-
Save brcosm/1172527 to your computer and use it in GitHub Desktop.
Ruby file used to convert some PAX html schedule data into a .plist for my iPhone app
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_panelist_array(div) | |
# Regex to get rid of bracketed stuff | |
r = /\[.*\]/ | |
panelists = div.gsub(r,"") | |
panelists = panelists.gsub("<i>Panelists include: ","") | |
panelists = panelists.gsub("</i>","") | |
panelists = panelists.split(", ") | |
xml = "\t\t<array>\n" | |
panelists.each do |p| | |
xml << "\t\t\t<string>#{p.strip}</string>\n" | |
end | |
xml << "\t\t</array>" | |
return xml | |
end | |
def convert_to_iso(time) | |
# Last 2 digits are always am or pm | |
ap = time.slice!(-2..-1) | |
time = time.split(":") | |
if ap == "pm" | |
hour_num = Integer(time[0])+12 | |
time[0] = "#{hour_num}" | |
else | |
time[0] = "0#{time[0]}".slice(-2..-1) | |
end | |
time[1] = "0#{time[1]}".slice(-2..-1) | |
return "#{time[0]}:#{time[1]}:00" | |
end | |
def replace_date(dt_string) | |
# Split it up based on spaces | |
dt = dt_string.split(" ") | |
day_part = "" | |
if dt[0] == "Friday" | |
day_part = "2011-08-26" | |
elsif dt[0] == "Saturday" | |
day_part = "2011-08-27" | |
else | |
day_part = "2011-08-28" | |
end | |
return "#{day_part}T#{convert_to_iso(dt[1])}" | |
end | |
# Get the raw text data from the website | |
f = File.open("events.txt","r") | |
data = f.read | |
f.close | |
# Remove all of the line breaks | |
data.gsub!("\n","") | |
# Create an array of divs by splitting on the close of the div tag | |
data = data.split("</div>") | |
# Regular expression that matches the div tag and grabs the class | |
r = /<div class=\"(?<class>.*)\">/ | |
# Replace the open div tag with the appropriate tag based on class | |
rows_from_title = 0 | |
number_of_dicts = 0 | |
xml = '<?xml version="1.0" encoding="UTF-8"?>' | |
xml << '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">' | |
xml << '<plist version="1.0">' | |
xml << "<array>\n" | |
data.each do |div| | |
# Get the clas name for this div | |
c = r.match(div)["class"] | |
if c == "faqhead" | |
if number_of_dicts > 0 | |
xml << "\t</dict>\n" | |
end | |
number_of_dicts += 1 | |
# reset the counter | |
rows_from_title = 0 | |
# This div is a title | |
key = "\t\t<key>title</key>" | |
val = "\t\t<string>#{div.gsub(r,"")}</string>" | |
xml << "\t<dict>\n#{key}\n#{val}\n" | |
elsif rows_from_title == 1 | |
# This is the location and date | |
divs = div.gsub(r,"").split("<br>") | |
key = "\t\t<key>location</key>" | |
val = "\t\t<string>#{divs[0]}</string>" | |
xml << "#{key}\n#{val}\n" | |
key = "\t\t<key>date</key>" | |
val = "\t\t<date>#{replace_date(divs[1])}</date>" | |
xml << "#{key}\n#{val}\n" | |
elsif rows_from_title == 2 | |
key = "\t\t<key>summary</key>" | |
val = "\t\t<string>#{div.gsub(r,"").gsub("<br>","\n").strip}</string>" | |
xml << "#{key}\n#{val}\n" | |
else | |
key = "\t\t<key>panelists</key>" | |
val = get_panelist_array(div.gsub(r,"")) | |
xml << "#{key}\n#{val}\n" | |
end | |
rows_from_title += 1 | |
end | |
xml << "</array>\n</plist>" | |
puts xml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment