Skip to content

Instantly share code, notes, and snippets.

@arafsheikh
Created May 26, 2015 18:25
Show Gist options
  • Save arafsheikh/6ae88c6118090bc0e415 to your computer and use it in GitHub Desktop.
Save arafsheikh/6ae88c6118090bc0e415 to your computer and use it in GitHub Desktop.
Scraper written in Python using Mechanize and BeautifulSoup (4632700)
from mechanize import Browser
import mechanize
from bs4 import BeautifulSoup
def getRoll():
global base
base += 1
return str(base)
def main():
roll = '4632719'
browser = Browser()
browser.set_handle_robots(False)
browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
browser.open('http://cbseresults.nic.in/class12/cbse122015_all.htm')
f = open('Name.txt', 'a')
while True:
roll = getRoll()
print roll
browser.select_form(nr=0)
browser.form['regno'] = roll
res = browser.submit()
soup = BeautifulSoup(res.read())
name = soup.find_all('td')[10].text
name = str(name)
mother = soup.find_all('td')[12].text
mother = str(mother)
father = soup.find_all('td')[14].text
father = str(father)
f.write(roll)
f.write(' : ')
f.write(name)
f.write(' : ')
f.write(mother)
f.write(' : ')
f.write(father)
f.write('\n')
browser.back()
if __name__ == '__main__':
base = 4632000
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment