chrisdietr · October 21, 2019 07:06
diff --git a/scrapy: login, go to directory page with pagination, there. store information from a linked page. b/scrapy: login, go to directory page with pagination, there. store information from a linked page.
 # -*- coding: utf-8 -*-
 import scrapy
 from scrapy.http import FormRequest, Request


 class QuotesSpider(scrapy.Spider):
    name = 'quoteslogin'
    allowed_domains = ['quotes.toscrape.com']
    start_urls = [
        'http://quotes.toscrape.com/login'
    ]
    directory_url = 'http://quotes.toscrape.com/page/1/'

    def parse(self, response):
        csrf_token = response.xpath("//input[@name='csrf_token']/@value").get()
        yield FormRequest.from_response(response, formxpath="//form", formdata={
            'csrf_token': csrf_token,
            'username': 'admin',
            'password': 'admin'
        }, callback=self.go_to_directory)

    def go_to_directory(self, response):
        return Request(url='http://quotes.toscrape.com/page/3/', callback=self.after_login)

    def after_login(self, response):


        for href in response.xpath('//a[contains(.,"(about)")]/@href'):
                yield response.follow(href, callback=self.parse_items)

        # for quote in response.xpath("//div[@class='quote']"):
        #     yield {
        #         'quote': quote.xpath(".//span[@class='text']/text()").get()
        #     }

        next_page = response.xpath("//li[@class='next']/a/@href").get()

        if next_page:
            yield response.follow(url=next_page, callback=self.after_login)

    def parse_items(self, response):
        yield {
            'year': response.xpath('//span[@class="author-born-date"]').get()
        }
	# -- coding: utf-8 --
	import scrapy
	from scrapy.http import FormRequest, Request


	class QuotesSpider(scrapy.Spider):
	name = 'quoteslogin'
	allowed_domains = ['quotes.toscrape.com']
	start_urls = [
	'http://quotes.toscrape.com/login'
	]
	directory_url = 'http://quotes.toscrape.com/page/1/'

	def parse(self, response):
	csrf_token = response.xpath("//input[@name='csrf_token']/@value").get()
	yield FormRequest.from_response(response, formxpath="//form", formdata={
	'csrf_token': csrf_token,
	'username': 'admin',
	'password': 'admin'
	}, callback=self.go_to_directory)

	def go_to_directory(self, response):
	return Request(url='http://quotes.toscrape.com/page/3/', callback=self.after_login)

	def after_login(self, response):


	for href in response.xpath('//a[contains(.,"(about)")]/@href'):
	yield response.follow(href, callback=self.parse_items)

	# for quote in response.xpath("//div[@class='quote']"):
	# yield {
	# 'quote': quote.xpath(".//span[@class='text']/text()").get()
	# }

	next_page = response.xpath("//li[@class='next']/a/@href").get()

	if next_page:
	yield response.follow(url=next_page, callback=self.after_login)

	def parse_items(self, response):
	yield {
	'year': response.xpath('//span[@class="author-born-date"]').get()
	}