ijharulislam · October 2, 2017 12:34
diff --git a/gistfile1.txt b/gistfile1.txt
 # -*- coding: utf-8 -*-
 import scrapy

 from nafdac.items import NafdacItem


 class NafDacCrawlerSpider(scrapy.Spider):
    name = 'nafdac'

    def start_requests(self):
        for i in range(0, 20702, 10):
            url = 'http://www.nafdac.gov.ng/index.php/product-registration/registered-drugs?resetfilters=0&limitstart10={}'.format(i)
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        tr = response.xpath('//*[@id="list_10_com_fabrik_10"]/tbody//tr')
        for t in tr:
            output = NafdacItem()
            product_type = t.xpath("td[1]/text()").extract_first().strip()
            product_name = t.xpath("td[2]/text()").extract_first()
            active_ingredients = t.xpath("td[3]/text()").extract_first()
            manufacturer = t.xpath("td[4]/text()").extract_first()
            if product_name:
                output = {
                    "product_type": product_type,
                    "product_name": product_name,
                    "active_ingredients": active_ingredients,
                    "manufacturer": manufacturer
                }
                print("product_type: {}, product_name: {}, active_ingredients:{}, manufacturer:{}".format(product_type, product_name, active_ingredients, manufacturer))
	# -- coding: utf-8 --
	import scrapy

	from nafdac.items import NafdacItem


	class NafDacCrawlerSpider(scrapy.Spider):
	name = 'nafdac'

	def start_requests(self):
	for i in range(0, 20702, 10):
	url = 'http://www.nafdac.gov.ng/index.php/product-registration/registered-drugs?resetfilters=0&limitstart10={}'.format(i)
	yield scrapy.Request(url=url, callback=self.parse)

	def parse(self, response):
	tr = response.xpath('//*[@id="list_10_com_fabrik_10"]/tbody//tr')
	for t in tr:
	output = NafdacItem()
	product_type = t.xpath("td[1]/text()").extract_first().strip()
	product_name = t.xpath("td[2]/text()").extract_first()
	active_ingredients = t.xpath("td[3]/text()").extract_first()
	manufacturer = t.xpath("td[4]/text()").extract_first()
	if product_name:
	output = {
	"product_type": product_type,
	"product_name": product_name,
	"active_ingredients": active_ingredients,
	"manufacturer": manufacturer
	}
	print("product_type: {}, product_name: {}, active_ingredients:{}, manufacturer:{}".format(product_type, product_name, active_ingredients, manufacturer))