Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save georgel/7177681813ad4bd213b6156deb1aaf0a to your computer and use it in GitHub Desktop.
Save georgel/7177681813ad4bd213b6156deb1aaf0a to your computer and use it in GitHub Desktop.
import asyncio
import logging
from pathlib import Path
from urllib.parse import urlparse
import click
from playwright.async_api import async_playwright
@click.command()
@click.option("--url", default="https://nytimes.com", help="The URL to screenshot")
@click.option("--output", type=click.Path(), default=".", help="The output directory for the HTML file")
def shot(url: str, output: str):
logging.basicConfig(level=logging.INFO)
logging.info(f"Screenshotting {url}")
asyncio.run(screenshot(url, output))
async def screenshot(url: str, output: str):
async with async_playwright() as p:
browser = await p.chromium.launch()
context = await browser.new_context(**p.devices['iPhone 13'])
page = await context.new_page()
await page.goto(url)
await page.add_script_tag(content="""
var elementToBoundingBox = {};
document.querySelectorAll('*').forEach(element => {
const boundingBox = element.getBoundingClientRect();
const { innerHeight, innerWidth } = window;
const isInViewport = !(boundingBox.top > innerHeight || boundingBox.bottom < 0 || boundingBox.left > innerWidth || boundingBox.right < 0);
const isVisible = boundingBox.width > 1 && boundingBox.height > 1;
if (isInViewport && isVisible) {
let myuuid = crypto.randomUUID();
elementToBoundingBox[myuuid] = boundingBox;
element.setAttribute('data-webtasks-id', myuuid);
}
});
var elementString = JSON.stringify(elementToBoundingBox, null, 4);
var divElement = document.createElement('div');
divElement.innerHTML = elementString;
divElement.id = 'bbox';
document.body.appendChild(divElement);
""")
content = await page.evaluate("document.documentElement.outerHTML")
domain = urlparse(url).netloc
output_file = Path(output) / f"{domain}.html"
with output_file.open("w") as f:
f.write(content)
screenshot_path = Path(output) / f"{domain}.png"
await page.screenshot(path=str(screenshot_path))
await context.close()
await browser.close()
if __name__ == "__main__":
shot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment