Skip to content

Instantly share code, notes, and snippets.

@tominsam
Created May 11, 2012 13:41
Show Gist options
  • Save tominsam/2659718 to your computer and use it in GitHub Desktop.
Save tominsam/2659718 to your computer and use it in GitHub Desktop.
crappy oyster data scraper
#!/usr/bin/env python
import mechanize
import datetime
import urllib
from bs4 import BeautifulSoup
##############
username = "fill me in"
password = "fill me in"
##############
start = "https://oyster.tfl.gov.uk/oyster/entry.do"
br = mechanize.Browser()
br.set_handle_robots(False) # bah
# br.set_debug_redirects(True)
# br.set_debug_responses(True)
# br.set_debug_http(True)
br.open(start)
br.select_form(name="sign-in")
br["j_username"] = username
br["j_password"] = password
r = br.submit()
assert br.viewing_html()
postData = urllib.urlencode({
"dateRange": "custom date range",
"offset": "0",
"rows": "100",
"customDateRangeSel": "false",
"isJSEnabledForPagenation": "false",
"isJSEnabledForPagination": "false",
"csDateFrom": (datetime.datetime.utcnow() - datetime.timedelta(days=30)).strftime("%d/%m/%Y"),
"csDateTo": datetime.datetime.utcnow().strftime("%d/%m/%Y"),
})
r = br.open("https://oyster.tfl.gov.uk/oyster/journeyHistory.do", postData)
soup = BeautifulSoup(r.read())
journeyhistory = soup.find("table", {"class": "journeyhistory"})
day = ""
for row in journeyhistory.children:
if isinstance(row, basestring):
continue
td_day = row.find("td", {"class": "day-date status-1"})
if td_day:
if td_day.find("a"):
day = td_day.find("a").get_text().strip()
else:
day = td_day.get_text().strip()
print "==="
print day
for table in row.find_all("table", {"class": "revealed-table"}):
for data_row in table.find_all("tr"):
datas = data_row.find_all("td")
time = datas[0].get_text().strip()
event = datas[1].get_text().strip()
print " - %s %s"%(time, event)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment