Skip to content

Instantly share code, notes, and snippets.

@nyov
Created September 13, 2014 17:40
Show Gist options
  • Save nyov/ed79aecea88373ac0535 to your computer and use it in GitHub Desktop.
Save nyov/ed79aecea88373ac0535 to your computer and use it in GitHub Desktop.
basic scrapy login
from scrapy.http import Request
from scrapy.exceptions import CloseSpider
from scrapy.selector import Selector
class MySpider(Spider):
name = ''
allowed_domains = [
]
start_urls = [
]
# Login
login_user = ''
login_pass = ''
login_url = ''
# ... the usual (Rules or something)
def start_requests(self):
self.log('Starting up with login...', level=log.INFO)
return [Request(url=self.login_url, callback=self.login)]
def login(self, response):
self.log('Attempting to login', level=log.DEBUG)
sel = Selector(response)
loginform = sel.xpath('//form[@name="loginForm"]') # your form name or whatever
formname = loginform.xpath('@name').extract()[0]
username = loginform.xpath('//input[@type="text"]/@name').extract()[0]
password = loginform.xpath('//input[@type="password"]/@name').extract()[0]
return [FormRequest.from_response(response, callback=self.check_login,
#formxpath='//form[@name="loginForm"]',
formname=formname,
formdata={
username: self.login_user,
password: self.login_pass,
},
)]
def check_login(self, response):
if 'You are logged in // Welcome, Scraper // Whatever' not in response.body:
self.log("Login failed", level=log.ERROR)
print response.body
raise CloseSpider('Login failed')
return
self.log('Logged in', level=log.INFO)
# don't care for this response, switch URL now
for url in self.start_urls:
#yield self.make_requests_from_url(url)
yield Request(url, dont_filter=True) # callback=self.parse
def parse(self, response):
# do something with logged in session
# possibly re-check at some times, that we are still logged in
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment