cosmocatalano · December 14, 2015 02:19
diff --git a/untappd_scrape.py b/untappd_scrape.py
 #!/usr/bin/python

 from bs4 import BeautifulSoup             #you will probably have to install this: http://www.crummy.com/software/BeautifulSoup/
 import urllib2
 import sys
 import cgitb
 import string
 import json

 #This takes a URL and turns it into BeautifulSoup object
 def make_soup(url):
 	try:
 		source = urllib2.urlopen(url).read()
 		soup = BeautifulSoup(source)
 		return soup
 	except:
 		print 'couldn\'t connect to source'
 		sys.exit()
    
 cgitb.enable(format='txt')  							#error reporting on, in text
 print 'Content-Type: text/plain\n' 						#specifies text, adds required after header info

 #Starting the connection
 opener = urllib2.build_opener()
 opener.addheaders = [('User-agent', 'Mozilla/5.0')]

 #Time-saving variables
 username = 'cosmocatalano' 							#you'll probably want to change this
 u_url = 'http://untappd.com'
 u_user = 'http://untappd.com/user/' + username

 #Grabbing the data
 response = opener.open(u_user)
 search_soup = make_soup(u_user)
 result = search_soup.find('div', 'details')					#not .find_all because I'm just looking for the latest beer

 #All the links in the most recent <div class="details">; a very close match for API data.
 API_bits = result.find_all('a')

 #Timestamp
 timestamp = result.find('li', class_='timezoner')

 #Getting the main page of the last beer checked-in
 beer_page = make_soup(u_url + API_bits[4].get('href'))

 #Getting the latest image, if one exists
 try:
 	beer_image = beer_page.find('div', class_='photo')
 	image_url = beer_image.a.img['src']
 except:
 	beer_image = beer_page.find('span', class_='icon')
 	image_url = beer_image.a.img['src']
 				 
 #Getting the rating of the beer through some belabored contortions
 rating_span = beer_page.find('span', class_='rating')
 rating_classes = rating_span['class']

 #A dictionary to map class to score
 score_value = {'r05': 0.5,
 	       'r10': 1,
 	       'r15': 1.5,
 	       'r20': 2,
 	       'r25': 2.5,
 	       'r30': 3,
 	       'r35': 3.5,
 	       'r40': 4,
 	       'r45': 4.5,
 	       'r50': 5 }
 	
 #This is a touch janky because it relies on the class indicating the rating to always be the third listed		
 my_score = score_value[rating_classes[2]]

 #Let's give our links some names 
 count_to_name = ('user',
 		'beer',
 		'brewer',
 		'location',
 		'checkin',
 		'extra')           #another janky move, safety for when there are six <a> tags in the <details> div
 				 
 #This dictionary will eventually become our API object
 scrape_obj = {'timestamp': timestamp.contents[0],
 	      'image' : image_url,
 	      'score' : my_score}
              
 #Setting up a loop to deal with all our links from <div class="details">              
 count = 0;
 for bit in API_bits:
 	links = [u_url + bit['href'], bit.contents[0]]            
 	scrape_obj[count_to_name[count]] = links
 	count = count + 1

 #Turning it into a JSON object for you to use as you see fit.	
 print json.dumps(scrape_obj)
	#!/usr/bin/python

	from bs4 import BeautifulSoup #you will probably have to install this: http://www.crummy.com/software/BeautifulSoup/
	import urllib2
	import sys
	import cgitb
	import string
	import json

	#This takes a URL and turns it into BeautifulSoup object
	def make_soup(url):
	try:
	source = urllib2.urlopen(url).read()
	soup = BeautifulSoup(source)
	return soup
	except:
	print 'couldn\'t connect to source'
	sys.exit()

	cgitb.enable(format='txt') #error reporting on, in text
	print 'Content-Type: text/plain\n' #specifies text, adds required after header info

	#Starting the connection
	opener = urllib2.build_opener()
	opener.addheaders = [('User-agent', 'Mozilla/5.0')]

	#Time-saving variables
	username = 'cosmocatalano' #you'll probably want to change this
	u_url = 'http://untappd.com'
	u_user = 'http://untappd.com/user/' + username

	#Grabbing the data
	response = opener.open(u_user)
	search_soup = make_soup(u_user)
	result = search_soup.find('div', 'details') #not .find_all because I'm just looking for the latest beer

	#All the links in the most recent <div class="details">; a very close match for API data.
	API_bits = result.find_all('a')

	#Timestamp
	timestamp = result.find('li', class_='timezoner')

	#Getting the main page of the last beer checked-in
	beer_page = make_soup(u_url + API_bits[4].get('href'))

	#Getting the latest image, if one exists
	try:
	beer_image = beer_page.find('div', class_='photo')
	image_url = beer_image.a.img['src']
	except:
	beer_image = beer_page.find('span', class_='icon')
	image_url = beer_image.a.img['src']

	#Getting the rating of the beer through some belabored contortions
	rating_span = beer_page.find('span', class_='rating')
	rating_classes = rating_span['class']

	#A dictionary to map class to score
	score_value = {'r05': 0.5,
	'r10': 1,
	'r15': 1.5,
	'r20': 2,
	'r25': 2.5,
	'r30': 3,
	'r35': 3.5,
	'r40': 4,
	'r45': 4.5,
	'r50': 5 }

	#This is a touch janky because it relies on the class indicating the rating to always be the third listed
	my_score = score_value[rating_classes[2]]

	#Let's give our links some names
	count_to_name = ('user',
	'beer',
	'brewer',
	'location',
	'checkin',
	'extra') #another janky move, safety for when there are six <a> tags in the <details> div

	#This dictionary will eventually become our API object
	scrape_obj = {'timestamp': timestamp.contents[0],
	'image' : image_url,
	'score' : my_score}

	#Setting up a loop to deal with all our links from <div class="details">
	count = 0;
	for bit in API_bits:
	links = [u_url + bit['href'], bit.contents[0]]
	scrape_obj[count_to_name[count]] = links
	count = count + 1

	#Turning it into a JSON object for you to use as you see fit.
	print json.dumps(scrape_obj)