Skip to content

Instantly share code, notes, and snippets.

@skwerlman
Created September 11, 2017 23:32
Show Gist options
  • Save skwerlman/882c2192fa55ab19fddb1814ba8dd7f2 to your computer and use it in GitHub Desktop.
Save skwerlman/882c2192fa55ab19fddb1814ba8dd7f2 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import json
import re
import requests
from bs4 import BeautifulSoup
from zenlog import log
NAME_CORRECTION_DICT = {
'm50 interceptor': 'm50',
'vanduul scythe': 'scythe',
'javelin-class destroyer': 'javelin',
'reliant kore - mini hauler': 'reliant kore',
'reliant mako - news van': 'reliant mako',
'reliant sen - researcher': 'reliant sen',
'reliant tana - skirmisher': 'reliant tana',
'esperia glaive': 'glaive',
'esperia vanduul blade': 'blade',
'mpuv personnel': 'mpuv-1p',
'mpuv cargo': 'mpuv-1c',
'esperia prowler': 'prowler',
'razor': 'misc razor',
'origin 600i touring': '600i touring',
'origin 600i explorer': '600i explorer',
}
SHIP_MATRIX_URL = "https://robertsspaceindustries.com/ship-specs"
SHIP_PRICE_LIST_URL = "http://starcitizen.wikia.com/wiki/List_of_ship_and_vehicle_prices"
OUT_FILE = "ship-data.tsv"
def get_price_data(soup):
tables = soup.select('table.article-table')
data = {}
for table in tables:
rows = table.select('tr')
for row in rows:
cols = row.select('td')
if not cols: # this is a header row
continue
name = cols[0].get_text().strip().lower()
price = cols[2].get_text().strip().replace('$', '').replace('--', '0')
data[name] = int(price)
return data
def get_matrix_data(soup, price_data):
match = re.search(r'data: (\[\{.*\}\])', str(soup))
json_str = match.group(1)
matrix = json.loads(json_str)
data = []
for ship in matrix:
ship_info = {}
focus = ship['focus']
if focus:
focuses = focus.split('/')
else:
focuses = ['']
ship_info['classification'] = ship['classification']
ship_info['focus1'] = focuses[0].strip()
if len(focuses) > 1:
ship_info['focus2'] = focuses[1].strip()
else:
ship_info['focus2'] = ''
ship_info['manufacturer'] = ship['manufacturer']['code']
ship_info['name'] = ship['name']
if ship_info['name'].lower() in NAME_CORRECTION_DICT:
ship_info['price'] = price_data[NAME_CORRECTION_DICT[ship_info['name'].lower()]]
elif ship_info['name'].lower() in price_data:
ship_info['price'] = price_data[ship_info['name'].lower()]
else:
log.warn(f'No price info for {ship_info["name"]}')
ship_info['price'] = 0
data.append(ship_info)
return data
def main():
r = requests.get(SHIP_PRICE_LIST_URL)
if r.status_code != 200:
log.critical(f'Ship prices failed to download with code {r.status_code}')
exit(1)
price_data = get_price_data(BeautifulSoup(r.text, 'html.parser'))
r = requests.get(SHIP_MATRIX_URL)
if r.status_code != 200:
log.critical(f'Ship matrix failed to download with code {r.status_code}')
exit(1)
data = get_matrix_data(BeautifulSoup(r.text, 'html.parser'), price_data)
keys = data[0].keys()
with open(OUT_FILE, 'w') as output_file:
dict_writer = csv.DictWriter(output_file, keys, delimiter='\t', lineterminator='\n')
dict_writer.writeheader()
dict_writer.writerows(data)
if __name__ == '__main__':
# try:
main()
# except Exception as exc:
# log.critical(exc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment