Skip to content

Instantly share code, notes, and snippets.

@ijharulislam
Created October 2, 2017 12:34
Show Gist options
  • Save ijharulislam/e016d29d53023936bc6bc5285aaa0d98 to your computer and use it in GitHub Desktop.
Save ijharulislam/e016d29d53023936bc6bc5285aaa0d98 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
from nafdac.items import NafdacItem
class NafDacCrawlerSpider(scrapy.Spider):
name = 'nafdac'
def start_requests(self):
for i in range(0, 20702, 10):
url = 'http://www.nafdac.gov.ng/index.php/product-registration/registered-drugs?resetfilters=0&limitstart10={}'.format(i)
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
tr = response.xpath('//*[@id="list_10_com_fabrik_10"]/tbody//tr')
for t in tr:
output = NafdacItem()
product_type = t.xpath("td[1]/text()").extract_first().strip()
product_name = t.xpath("td[2]/text()").extract_first()
active_ingredients = t.xpath("td[3]/text()").extract_first()
manufacturer = t.xpath("td[4]/text()").extract_first()
if product_name:
output = {
"product_type": product_type,
"product_name": product_name,
"active_ingredients": active_ingredients,
"manufacturer": manufacturer
}
print("product_type: {}, product_name: {}, active_ingredients:{}, manufacturer:{}".format(product_type, product_name, active_ingredients, manufacturer))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment