Created
May 6, 2024 09:02
-
-
Save georgel/7177681813ad4bd213b6156deb1aaf0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import logging | |
from pathlib import Path | |
from urllib.parse import urlparse | |
import click | |
from playwright.async_api import async_playwright | |
@click.command() | |
@click.option("--url", default="https://nytimes.com", help="The URL to screenshot") | |
@click.option("--output", type=click.Path(), default=".", help="The output directory for the HTML file") | |
def shot(url: str, output: str): | |
logging.basicConfig(level=logging.INFO) | |
logging.info(f"Screenshotting {url}") | |
asyncio.run(screenshot(url, output)) | |
async def screenshot(url: str, output: str): | |
async with async_playwright() as p: | |
browser = await p.chromium.launch() | |
context = await browser.new_context(**p.devices['iPhone 13']) | |
page = await context.new_page() | |
await page.goto(url) | |
await page.add_script_tag(content=""" | |
var elementToBoundingBox = {}; | |
document.querySelectorAll('*').forEach(element => { | |
const boundingBox = element.getBoundingClientRect(); | |
const { innerHeight, innerWidth } = window; | |
const isInViewport = !(boundingBox.top > innerHeight || boundingBox.bottom < 0 || boundingBox.left > innerWidth || boundingBox.right < 0); | |
const isVisible = boundingBox.width > 1 && boundingBox.height > 1; | |
if (isInViewport && isVisible) { | |
let myuuid = crypto.randomUUID(); | |
elementToBoundingBox[myuuid] = boundingBox; | |
element.setAttribute('data-webtasks-id', myuuid); | |
} | |
}); | |
var elementString = JSON.stringify(elementToBoundingBox, null, 4); | |
var divElement = document.createElement('div'); | |
divElement.innerHTML = elementString; | |
divElement.id = 'bbox'; | |
document.body.appendChild(divElement); | |
""") | |
content = await page.evaluate("document.documentElement.outerHTML") | |
domain = urlparse(url).netloc | |
output_file = Path(output) / f"{domain}.html" | |
with output_file.open("w") as f: | |
f.write(content) | |
screenshot_path = Path(output) / f"{domain}.png" | |
await page.screenshot(path=str(screenshot_path)) | |
await context.close() | |
await browser.close() | |
if __name__ == "__main__": | |
shot() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment