Skip to content

Instantly share code, notes, and snippets.

@therealkenc
Last active May 25, 2024 21:38
Show Gist options
  • Save therealkenc/46afe6f09da9edde4b55c8ccbf550bf0 to your computer and use it in GitHub Desktop.
Save therealkenc/46afe6f09da9edde4b55c8ccbf550bf0 to your computer and use it in GitHub Desktop.
import sys
import http.client
import gzip
import io
import pandas as pd
from bs4 import BeautifulSoup
# Define the URL and headers
url = 'www.forexfactory.com'
if len(sys.argv) > 1:
date_range = sys.argv[1]
path = f'/calendar?range={date_range}'
else:
path = '/calendar'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Cache-Control': 'max-age=0'
}
# Create a connection
conn = http.client.HTTPSConnection(url)
# Make a request
conn.request("GET", path, headers=headers)
# Get the response
response = conn.getresponse()
status_code = response.status
content = response.read()
# Check if the request was successful
if status_code == 200:
# Handle compression if necessary
if response.getheader('Content-Encoding') == 'gzip':
buf = io.BytesIO(content)
f = gzip.GzipFile(fileobj=buf)
content = f.read()
# Decode the content
content = content.decode('utf-8')
# Parse the HTML content
soup = BeautifulSoup(content, 'html.parser')
# Find the calendar table
table = soup.find('table', {'class': 'calendar__table'})
if table:
# Find all rows within the table
rows = table.find_all('tr', {'class': 'calendar__row'})
# Extract details from each row
calendar_data = []
for row in rows:
time_element = row.find('td', {'class': 'calendar__time'})
currency_element = row.find('td', {'class': 'calendar__currency'})
event_element = row.find('td', {'class': 'calendar__event'})
if time_element and currency_element and event_element:
date_element = row.find_previous('tr', {'class': 'calendar__row--day-breaker'})
date = date_element.text.strip() if date_element else 'N/A'
time = time_element.text.strip()
currency = currency_element.text.strip()
event = event_element.text.strip()
calendar_data.append({
'date': date,
'time': time,
'currency': currency,
'event': event
})
# Create a pandas DataFrame
df = pd.DataFrame(calendar_data)
# Print the DataFrame
print(df)
else:
print("Calendar table not found.")
else:
print(f"Failed to retrieve the webpage. Status code: {status_code}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment