brcosm · August 26, 2011 02:08
diff --git a/pax.rb b/pax.rb
 def get_panelist_array(div)
  # Regex to get rid of bracketed stuff
  r = /\[.*\]/
  
  panelists = div.gsub(r,"")
  panelists = panelists.gsub("<i>Panelists include: ","")
  panelists = panelists.gsub("</i>","")
  panelists = panelists.split(", ")
  
  xml = "\t\t<array>\n"
  panelists.each do |p|
    xml << "\t\t\t<string>#{p.strip}</string>\n"
  end
  xml << "\t\t</array>"
  return xml
 end

 def convert_to_iso(time)
  # Last 2 digits are always am or pm
  ap = time.slice!(-2..-1)
  time = time.split(":")
  if ap == "pm"
    hour_num = Integer(time[0])+12
    time[0] = "#{hour_num}"
  else
    time[0] = "0#{time[0]}".slice(-2..-1)
  end
  time[1] = "0#{time[1]}".slice(-2..-1)
  return "#{time[0]}:#{time[1]}:00"
 end

 def replace_date(dt_string)
  # Split it up based on spaces
  dt = dt_string.split(" ")
  day_part = ""
  if dt[0] == "Friday"
    day_part = "2011-08-26"
  elsif dt[0] == "Saturday"
    day_part = "2011-08-27"
  else 
    day_part = "2011-08-28"
  end
  return "#{day_part}T#{convert_to_iso(dt[1])}"
 end

 # Get the raw text data from the website
 f = File.open("events.txt","r")
 data = f.read
 f.close

 # Remove all of the line breaks
 data.gsub!("\n","")

 # Create an array of divs by splitting on the close of the div tag
 data = data.split("</div>")

 # Regular expression that matches the div tag and grabs the class
  r = /<div class=\"(?<class>.*)\">/

 # Replace the open div tag with the appropriate tag based on class
 rows_from_title = 0
 number_of_dicts = 0

 xml = '<?xml version="1.0" encoding="UTF-8"?>'
 xml << '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">'
 xml << '<plist version="1.0">'
 xml << "<array>\n"
 data.each do |div|
  # Get the clas name for this div
  c = r.match(div)["class"]
  
  if c == "faqhead"
    if number_of_dicts > 0
      xml << "\t</dict>\n"
    end
    number_of_dicts += 1
    # reset the counter
    rows_from_title = 0
    # This div is a title
    key = "\t\t<key>title</key>"
    val = "\t\t<string>#{div.gsub(r,"")}</string>"
    xml << "\t<dict>\n#{key}\n#{val}\n"
  elsif rows_from_title == 1
    # This is the location and date
    divs = div.gsub(r,"").split("<br>")
    key = "\t\t<key>location</key>"
    val = "\t\t<string>#{divs[0]}</string>"
    xml << "#{key}\n#{val}\n"
    key = "\t\t<key>date</key>"
    val = "\t\t<date>#{replace_date(divs[1])}</date>"
    xml << "#{key}\n#{val}\n"
  elsif rows_from_title == 2
    key = "\t\t<key>summary</key>"
    val = "\t\t<string>#{div.gsub(r,"").gsub("<br>","\n").strip}</string>"
    xml << "#{key}\n#{val}\n"
  else
    key = "\t\t<key>panelists</key>"
    val = get_panelist_array(div.gsub(r,""))
    xml << "#{key}\n#{val}\n"
  end
  rows_from_title += 1
 end
 xml << "</array>\n</plist>"
 puts xml
	def get_panelist_array(div)
	# Regex to get rid of bracketed stuff
	r = /\[.*\]/

	panelists = div.gsub(r,"")
	panelists = panelists.gsub("<i>Panelists include: ","")
	panelists = panelists.gsub("</i>","")
	panelists = panelists.split(", ")

	xml = "\t\t<array>\n"
	panelists.each do \|p\|
	xml << "\t\t\t<string>#{p.strip}</string>\n"
	end
	xml << "\t\t</array>"
	return xml
	end

	def convert_to_iso(time)
	# Last 2 digits are always am or pm
	ap = time.slice!(-2..-1)
	time = time.split(":")
	if ap == "pm"
	hour_num = Integer(time[0])+12
	time[0] = "#{hour_num}"
	else
	time[0] = "0#{time[0]}".slice(-2..-1)
	end
	time[1] = "0#{time[1]}".slice(-2..-1)
	return "#{time[0]}:#{time[1]}:00"
	end

	def replace_date(dt_string)
	# Split it up based on spaces
	dt = dt_string.split(" ")
	day_part = ""
	if dt[0] == "Friday"
	day_part = "2011-08-26"
	elsif dt[0] == "Saturday"
	day_part = "2011-08-27"
	else
	day_part = "2011-08-28"
	end
	return "#{day_part}T#{convert_to_iso(dt[1])}"
	end

	# Get the raw text data from the website
	f = File.open("events.txt","r")
	data = f.read
	f.close

	# Remove all of the line breaks
	data.gsub!("\n","")

	# Create an array of divs by splitting on the close of the div tag
	data = data.split("</div>")

	# Regular expression that matches the div tag and grabs the class
	r = /<div class=\"(?<class>.*)\">/

	# Replace the open div tag with the appropriate tag based on class
	rows_from_title = 0
	number_of_dicts = 0

	xml = '<?xml version="1.0" encoding="UTF-8"?>'
	xml << '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">'
	xml << '<plist version="1.0">'
	xml << "<array>\n"
	data.each do \|div\|
	# Get the clas name for this div
	c = r.match(div)["class"]

	if c == "faqhead"
	if number_of_dicts > 0
	xml << "\t</dict>\n"
	end
	number_of_dicts += 1
	# reset the counter
	rows_from_title = 0
	# This div is a title
	key = "\t\t<key>title</key>"
	val = "\t\t<string>#{div.gsub(r,"")}</string>"
	xml << "\t<dict>\n#{key}\n#{val}\n"
	elsif rows_from_title == 1
	# This is the location and date
	divs = div.gsub(r,"").split("<br>")
	key = "\t\t<key>location</key>"
	val = "\t\t<string>#{divs[0]}</string>"
	xml << "#{key}\n#{val}\n"
	key = "\t\t<key>date</key>"
	val = "\t\t<date>#{replace_date(divs[1])}</date>"
	xml << "#{key}\n#{val}\n"
	elsif rows_from_title == 2
	key = "\t\t<key>summary</key>"
	val = "\t\t<string>#{div.gsub(r,"").gsub("<br>","\n").strip}</string>"
	xml << "#{key}\n#{val}\n"
	else
	key = "\t\t<key>panelists</key>"
	val = get_panelist_array(div.gsub(r,""))
	xml << "#{key}\n#{val}\n"
	end
	rows_from_title += 1
	end
	xml << "</array>\n</plist>"
	puts xml