Skip to content

Instantly share code, notes, and snippets.

@huynhbaoan
Created August 30, 2024 12:30
Show Gist options
  • Save huynhbaoan/7f4a90ac8a3489ed300f92e3205a2502 to your computer and use it in GitHub Desktop.
Save huynhbaoan/7f4a90ac8a3489ed300f92e3205a2502 to your computer and use it in GitHub Desktop.
Code to handle small tasks
from bs4 import BeautifulSoup
import csv
import requests
# URL of the static HTML page
url = 'https://example.com/static-page.html' # Replace with your actual URL
# Fetch the page content
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Open a CSV file to write the output
with open('output.csv', 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# Write header row (optional)
csvwriter.writerow(['Account ID', 'Environment', 'Description', 'URL'])
# Loop through each table row (tr) and discard the first row
for index, tr in enumerate(soup.find_all('tr')):
if index == 0:
continue # Skip the first row
# Extract data from specific columns (td elements)
columns = tr.find_all('td')
if len(columns) >= 7: # Ensure there are at least 7 columns
account_id = columns[0].text.strip()
environment = columns[1].text.strip()
description = columns[2].text.strip()
url = columns[6].find('a')['href'] if columns[6].find('a') else ''
# Write the extracted data to the CSV file
csvwriter.writerow([account_id, environment, description, url])
print("CSV file has been created successfully.")
else:
print(f"Failed to retrieve the page. Status code: {response.status_code}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment