Last active
May 19, 2020 19:29
-
-
Save ryonlife/e9131d8ac2c9ce1d089a98eb346905e7 to your computer and use it in GitHub Desktop.
scrapy-autointegration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import shutil | |
import time | |
import yaml | |
from pegbot.utils import get_project_root | |
class AutointegrationPipeline: | |
""" | |
Pipeline for generating autointegration test and fixture files. | |
""" | |
def open_spider(self, spider): # pylint: disable=no-self-use | |
""" | |
Create a fresh directory and test files for the spider being run. | |
""" | |
if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False): | |
# Piggyback off autounit settings so integration tests are generated on the same crawl | |
return | |
# Directory | |
path = f'{get_project_root()}/autointegration/tests/{spider.name}' | |
if os.path.exists(path): | |
shutil.rmtree(path) | |
os.mkdir(path) | |
# Module file | |
file = open(f'{path}/__init__.py', 'w') | |
file.close() | |
# Test file | |
test = '' | |
test += "# -*- coding: utf-8 -*-\n" | |
test += "from autointegration.generate_test import generate_test\n" | |
test += f"def test_{spider.name}():\n" | |
test += f"\tgenerate_test('{spider.name}')()\n" | |
file = open(f'{path}/test_{spider.name}.py', 'w') | |
file.write(test) | |
file.close() | |
def process_item(self, product, spider): # pylint: disable=no-self-use | |
""" | |
Create new fixture files. | |
""" | |
if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False): | |
# Piggyback off autounit settings so integration tests are generated on the same crawl | |
return product | |
path = f'{get_project_root()}/autointegration/tests/{spider.name}' | |
file = open(f'{path}/fixture_{int(time.time())}.yaml', 'w') | |
file.write(yaml.dump({ | |
'url': product['url'], | |
'product': { | |
'category': product['category'], | |
'name': product['name'], | |
} | |
})) | |
file.close() | |
return product |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import yaml | |
from scrapy import signals | |
from scrapy.crawler import CrawlerProcess | |
from scrapy.utils.project import get_project_settings | |
from w3lib.url import canonicalize_url | |
def generate_test(spider_name): | |
"""Generates an integration test of a VendorSpider subclass.""" | |
def _test(): | |
"""Performs an integration test of a VendorSpider subclass.""" | |
# Configure the crawler | |
settings = get_project_settings() | |
settings['AUTOUNIT_ENABLED'] = False | |
process = CrawlerProcess(settings) | |
crawler = process.create_crawler(spider_name) | |
# Load fixtures from .yaml files | |
fixtures = {} | |
path = f'{os.path.dirname(os.path.abspath(__file__))}/tests/{spider_name}' | |
files = [file for file in os.listdir(path) if file.endswith('.yaml')] | |
for file in files: | |
file_path = os.path.join(path, file) | |
with open(file_path) as file: | |
fixture = yaml.load(file, Loader=yaml.FullLoader) | |
fixtures[canonicalize_url(fixture['url'])] = fixture['product'] | |
def _test_parse_product(item): | |
"""Tests for correct scraping of product info.""" | |
nonlocal fixtures | |
if item['url'] not in fixtures.keys(): | |
raise AssertionError(f"Product URL mismatch: {canonicalize_url(item['url'])}") | |
for key, val in fixtures[item['url']].items(): | |
assert item[key] == val | |
# Attach test handlers to various event signals | |
# https://docs.scrapy.org/en/latest/topics/signals.html#topics-signals-ref | |
crawler.signals.connect(_test_parse_product, signal=signals.item_scraped) | |
# Run the crawler | |
process.crawl(crawler, *[], seed_urls=fixtures.keys(), crawl_patterns=[]) | |
process.start() | |
# Integration test fails if errors have been counted in the crawler's stats | |
if crawler.stats.get_value('log_count/ERROR'): | |
raise AssertionError | |
return _test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from autointegration.generate_test import generate_test | |
def test_name_of_spider(): | |
generate_test('test_name_of_spider')() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment