Created
October 19, 2017 08:54
-
-
Save ijharulislam/ae9cb7f5bbeb7a8769da5da86dcd2d0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_dir_contents(self, response): | |
tr = response.xpath('//*[@id="medicine-box"]/table//tr') | |
item = CzechItem() | |
for t in tr: | |
th = t.xpath('th/text()').extract_first() | |
if th == 'SÚKL code': | |
item['sukl_code'] = t.xpath('td/text()').extract_first() | |
elif th == 'Name of the product': | |
item['product_name'] = t.xpath('td/text()').extract_first() | |
elif th == '_MA_MEDICATION_DETAIL_INFO_MED_NAME': | |
item['med_info_detail'] = t.xpath('td/text()').extract_first() | |
elif th == 'Supplement': | |
item['supplement'] = t.xpath('td/text()').extract_first() | |
elif th == 'Strenght': | |
item['strenght'] = t.xpath('td/text()').extract_first() | |
elif th == 'Pharmaceutical form': | |
item['pharmaceutical_form'] = t.xpath('td/text()').extract_first() | |
elif th == 'Package': | |
item['package'] = t.xpath('td/text()').extract_first() | |
elif th == 'Route': | |
item['route'] = t.xpath('td/text()').extract_first() | |
elif th == 'Language of the pack': | |
item['language_of_the_pack'] = t.xpath('td/text()').extract_first() | |
elif th == 'Wrap type': | |
item['wrap_type'] = t.xpath('td/text()').extract_first() | |
elif th == 'Legal status': | |
item['legal_status'] = t.xpath('td/text()').extract_first() | |
elif th == 'Active substance': | |
item['active_substance'] = t.xpath('td/text()').extract_first() | |
elif th == 'ATC group': | |
item['atc_group'] = t.xpath('td/text()').extract_first() | |
elif th == 'ATC group name': | |
item['atc_group_name'] = t.xpath('td/text()').extract_first() | |
yield ite |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment