Skip to content

Instantly share code, notes, and snippets.

@ijharulislam
Created October 19, 2017 08:54
Show Gist options
  • Save ijharulislam/ae9cb7f5bbeb7a8769da5da86dcd2d0a to your computer and use it in GitHub Desktop.
Save ijharulislam/ae9cb7f5bbeb7a8769da5da86dcd2d0a to your computer and use it in GitHub Desktop.
def parse_dir_contents(self, response):
tr = response.xpath('//*[@id="medicine-box"]/table//tr')
item = CzechItem()
for t in tr:
th = t.xpath('th/text()').extract_first()
if th == 'SÚKL code':
item['sukl_code'] = t.xpath('td/text()').extract_first()
elif th == 'Name of the product':
item['product_name'] = t.xpath('td/text()').extract_first()
elif th == '_MA_MEDICATION_DETAIL_INFO_MED_NAME':
item['med_info_detail'] = t.xpath('td/text()').extract_first()
elif th == 'Supplement':
item['supplement'] = t.xpath('td/text()').extract_first()
elif th == 'Strenght':
item['strenght'] = t.xpath('td/text()').extract_first()
elif th == 'Pharmaceutical form':
item['pharmaceutical_form'] = t.xpath('td/text()').extract_first()
elif th == 'Package':
item['package'] = t.xpath('td/text()').extract_first()
elif th == 'Route':
item['route'] = t.xpath('td/text()').extract_first()
elif th == 'Language of the pack':
item['language_of_the_pack'] = t.xpath('td/text()').extract_first()
elif th == 'Wrap type':
item['wrap_type'] = t.xpath('td/text()').extract_first()
elif th == 'Legal status':
item['legal_status'] = t.xpath('td/text()').extract_first()
elif th == 'Active substance':
item['active_substance'] = t.xpath('td/text()').extract_first()
elif th == 'ATC group':
item['atc_group'] = t.xpath('td/text()').extract_first()
elif th == 'ATC group name':
item['atc_group_name'] = t.xpath('td/text()').extract_first()
yield ite
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment