Skip to content

Instantly share code, notes, and snippets.

@milesgrimshaw
Created August 6, 2014 11:47
Show Gist options
  • Save milesgrimshaw/818bfd7c1f8b762563dd to your computer and use it in GitHub Desktop.
Save milesgrimshaw/818bfd7c1f8b762563dd to your computer and use it in GitHub Desktop.
Script to collect and get routes for personal Citibike data
## API Structure
# {
# "name": "citibike_bay",
# "count": 8,
# "frequency": "realtime",
# "version": 1,
# "newdata": true,
# "lastrunstatus": "success",
# "lastsuccess": "Tue Jul 22 2014 21:50:19 GMT+0000 (UTC)",
# "results": {
# "collection1": [
# {
# "Start_Station": "E 25 St & 2 Ave",
# "Start_Time": "8/31/13 4:30:54 PM",
# "End_Station": "Broadway & W 24 St",
# "End_Time": "8/31/13 4:39:06 PM"
# },
# ....
# ]
# }
# }
## Required gems
require 'rest_client'
require 'pp'
require 'json'
require 'csv'
require 'time'
# require 'date'
## Keys
KIMONO_API = ''
KIMONO_API_KEY = ''
KIMONO_API_BEARER = ''
GOOGLE_API_KEY = ''
CSV_NAME = 'trips.csv'
STATIONS_DOC = 'stations.json'
NUMBER_PAGES_CITIBIKE_DATA = 13
## Pass in Citibike time as a string
## Return string as a Time object
def create_time_stamp(time)
format = "%m/%d/%y %I:%M:%S %p"
time = time.gsub(/\d+/) {|s| "%02d" % s.to_i}
time = Time.strptime(time, format)
return time
end
def create_stations_hash ( doc_name )
## Read in and parse the list of stations
stations_list = JSON.parse( IO.read( 'stations.json' ) )
stations_list = stations_list["stationBeanList"]
stations_hash = {}
## Take the stations list and map station names to lat / lon
for i in 0..(stations_list.length-1)
station = stations_list[i]
name = station["stationName"]
lat = station["latitude"]
lon = station["longitude"]
stations_hash[name] = [lat,lon]
end
return stations_hash
end
def catch_errors( station )
## Manually fix an error in Citibike station naming
error_stations = ["E 47 St & 2 Ave", "Lawrence St & Willoughby St"]
corrected_stations = ["Greenwich Ave & Charles St", "Lafayette St & Jersey St"]
for i in 0..(error_stations.length-1)
if station == error_stations[i]
station = corrected_stations[i]
end
end
return station
end
stations_hash = create_stations_hash(STATIONS_DOC)
## Set up the csv with column titles
CSV.open("#{CSV_NAME}", 'a') do |f|
f << ["start_station", "start_station_lat", "start_station_lon", "start_time", "end_station", "end_station_lat", "end_station_lon", "end_time", "actual_duration", "estimated_duration", "estimated_distance"]
end
## Iterate over all the pages of trip data on citibike's website
for p in 1..NUMBER_PAGES_CITIBIKE_DATA
pp "Collecting Page: #{p}"
## Get the data from the API and subset by the results
response = RestClient.get("https://www.kimonolabs.com/api/#{KIMONO_API}?apikey=#{KIMONO_API_KEY}&kimpath3=#{p}", {'authorization' => "Bearer #{KIMONO_API_BEARER}"});
response = JSON.parse(response)
results = response["results"]["collection1"]
num = results.length
for i in 0..(num-1)
begin
## Get the data wanted
trip = results[i]
start_station = trip["Start_Station"]
start_time = trip["Start_Time"]
end_station = trip["End_Station"]
end_time = trip["End_Time"]
## Catch naming errors in Citibike data
end_station = catch_errors(end_station)
start_station = catch_errors(start_station)
## Calculate the actual trip time
start_time_stamp = create_time_stamp(start_time)
end_time_stamp = create_time_stamp(end_time)
actual_duration = end_time_stamp - start_time_stamp
## Turn station names into geocodes
start_station_geocode = stations_hash[start_station]
end_station_geocode = stations_hash[end_station]
start_station_lat = start_station_geocode[0]
start_station_lon = start_station_geocode[1]
end_station_lat = end_station_geocode[0]
end_station_lon = end_station_geocode[1]
## Get Google Estimates For Cycling Time and Distance
estimated_distance = 'NA'
estimated_duration = 'NA'
begin
response = RestClient.get("https://maps.googleapis.com/maps/api/distancematrix/json?origins=#{start_station_lat},#{start_station_lon}&destinations=#{end_station_lat},#{end_station_lon}&key=#{GOOGLE_API_KEY}&mode=bicycling")
response = JSON.parse(response)
data = response['rows'][0]['elements'][0]
estimated_distance = data['distance']['value']
estimated_duration = data['duration']['value']
rescue
pp "Google Geocoding Eroor"
end
## Open the CSV and save the data
CSV.open("#{CSV_NAME}", 'a') do |f|
f << [start_station, start_station_lat, start_station_lon, start_time, end_station, end_station_lat, end_station_lon, end_time, actual_duration, estimated_duration, estimated_distance]
end
## Catch an error if the row is missing a station etc.
rescue
pp "NOT COMPLETE TRIP DATA"
end
end
## Sometimes there are throtelling issues with Kimono
## This is just a safety that solves this
sleep 20
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment