Last active
June 18, 2022 16:14
-
-
Save joedf/35faf5d1142f402a504be8748d20257e to your computer and use it in GitHub Desktop.
Parse html pages from the Tap Dig MyMuseum fandom wiki...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# by joedf - MIT license - May 2022 | |
from bs4 import BeautifulSoup | |
import glob, csv | |
# get html pages from either of these pages | |
# https://tap-dig-my-museum-wiki.fandom.com/wiki/Fossils | |
# https://tap-dig-my-museum-wiki.fandom.com/wiki/List_of_fossils | |
def file2Soup(path, enc="utf8"): | |
with open(path, encoding=enc) as fp: | |
_soup = BeautifulSoup(fp, 'html.parser') | |
return _soup | |
def getDataPoint(soup, dsource): | |
return soup.find('div',{'class':'pi-item', 'data-source': dsource}) | |
def getValue(soup): | |
return soup.find('div', {'class': 'pi-data-value'}).get_text() | |
def getLabel(soup): | |
return soup.find('h3', {'class': 'pi-data-label'}).get_text() | |
def getKeyVal(soup, dsource): | |
_item = getDataPoint(html, dsource) | |
_value = getValue(_item) | |
_label = getLabel(_item) | |
return {'key': dsource, 'name': _label, 'value': _value} | |
def getBoneData(soup): | |
bones = soup.find_all('div', {'class': 'pi-item', 'data-source': 'bones'}) | |
box = {} | |
for bone in bones: | |
_label = getLabel(bone) | |
_value = getValue(bone) | |
_key = str(_label).lower().replace(' ','_') | |
if 'sale' in str(_label).lower(): | |
_sValue = str(_value).replace(')', ')|', 1) | |
_value = _sValue.split('|') | |
box[_key] = { | |
'name': _label, | |
'value': _value | |
} | |
return box | |
def getDino(soup): | |
bones = getBoneData(html) | |
return { | |
'name': soup.find('h2',{'class':'pi-title', 'data-source': 'title'}).get_text(), | |
'tier': getKeyVal(soup, 'tier')['value'], | |
'excavation_cost': intParse(getKeyVal(html, 'excavation_cost_coins')['value']), | |
'bone_fragments': bones['bone_fragments']['value'], | |
'minimum_cost': intParse(bones['minimum_cost']['value']), | |
'sale_value_100': intParse(bones['sale_value']['value'][0]), | |
'sale_value_200': intParse(bones['sale_value']['value'][1]) | |
} | |
def intParse(txt): | |
return int(txt.replace(',','').split(' ')[0]) | |
dinos = [] | |
i = 0 | |
for file in glob.glob("*.html"): | |
html = file2Soup(file) | |
dino = getDino(html) | |
dinos.append(dino) | |
i += 1 | |
print(f"Dino parsed ({i}): {dino['name']}") | |
print('\nsaving as dinos_file.csv ...') | |
with open('dinos_file.csv', mode='w', newline='', encoding='utf-8') as csv_file: | |
fieldnames = ['name', 'tier', 'excavation_cost', 'bone_fragments', 'minimum_cost', 'sale_value_100', 'sale_value_200'] | |
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) | |
writer.writeheader() | |
for dino in dinos: | |
writer.writerow(dino) | |
print('Done.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name | tier | excavation_cost | bone_fragments | minimum_cost | sale_value_100 | sale_value_200 | |
---|---|---|---|---|---|---|---|
Acrocanthosaurus | Gold | 7800 | 40 | 156000 | 93600 | 873600 | |
Allosaurus | Gold | 6000 | 38 | 114000 | 68400 | 662400 | |
Amargasaurus | Gold | 15000 | 44 | 330000 | 198000 | 1728000 | |
Ammonoidea1 | Green | 1000 | 12 | 6000 | 3600 | 89600 | |
Ammonoidea2 | Green | 2100 | 12 | 12600 | 7560 | 188160 | |
Ammonoidea3 | Green | 3200 | 12 | 19200 | 11520 | 286720 | |
Ankylosaurus | Bronze | 300 | 15 | 2400 | 1350 | 27600 | |
Archelon | Silver | 20000 | 38 | 380000 | 228000 | 2208000 | |
Arsinoitherium | Bronze | 17000 | 25 | 221000 | 127500 | 1700000 | |
Baryonyx | Silver | 19000 | 32 | 304000 | 182400 | 2006400 | |
Basilosaurus | Platinum | 40000 | 60 | 1200000 | 720000 | 5120000 | |
Brachiosaurus | Platinum | 30000 | 79 | 1200000 | 711000 | 4296000 | |
Camarasaurus | Gold | 10000 | 40 | 200000 | 120000 | 1120000 | |
Carcharodontosaurus | Gold | 47000 | 43 | 1034000 | 606300 | 5376800 | |
Carnotaurus | Bronze | 4500 | 22 | 49500 | 29700 | 439200 | |
Centrosaurus | Silver | 4200 | 30 | 63000 | 37800 | 436800 | |
Ceratosaurus | Bronze | 10900 | 22 | 119900 | 71940 | 1063840 | |
Chasmosaurus | Silver | 17000 | 31 | 272000 | 158100 | 1781600 | |
Corythosaurus | Silver | 8800 | 30 | 132000 | 79200 | 915200 | |
Cryolophosaurus | Silver | 43000 | 34 | 731000 | 438600 | 4609600 | |
Cymbospondylus | Silver | 12800 | 31 | 204800 | 119040 | 1341440 | |
Dakosaurus | Bronze | 12700 | 21 | 139700 | 80010 | 1229360 | |
Deinocheirus | Gold | 24000 | 43 | 528000 | 309600 | 2745600 | |
Deinonychus | Bronze | 11500 | 23 | 138000 | 79350 | 1131600 | |
Dilophosaurus | Silver | 5500 | 29 | 82500 | 47850 | 567600 | |
Dimetrodon | Bronze | 18000 | 27 | 252000 | 145800 | 1828800 | |
Dimorphodon | Bronze | 700 | 16 | 5600 | 3360 | 64960 | |
Diplodocus | Platinum | 60000 | 64 | 1920000 | 1152000 | 7872000 | |
Dracorex | Bronze | 16500 | 23 | 198000 | 113850 | 1623600 | |
Dunkleosteus | Bronze | 14600 | 23 | 175200 | 100740 | 1436640 | |
Edmontonia | Bronze | 4900 | 25 | 63700 | 36750 | 490000 | |
Edmontosaurus | Silver | 22000 | 35 | 396000 | 231000 | 2376000 | |
Elasmosaurus | Gold | 18000 | 44 | 396000 | 237600 | 2073600 | |
Elasmotherium | Silver | 32000 | 36 | 576000 | 345600 | 3481600 | |
Euoplocephalus | Bronze | 13700 | 26 | 178100 | 106860 | 1380960 | |
Futabasaurus | Silver | 29000 | 35 | 522000 | 304500 | 3132000 | |
Gallimimus | Bronze | 15500 | 24 | 186000 | 111600 | 1537600 | |
Gastonia | Bronze | 2100 | 30 | 31500 | 18900 | 218400 | |
Gastornis | Bronze | 15000 | 24 | 180000 | 108000 | 1488000 | |
Giganotosaurus | Gold | 32000 | 43 | 704000 | 412800 | 3660800 | |
Gigantoraptor | Silver | 34000 | 34 | 578000 | 346800 | 3644800 | |
Gryposaurus | Silver | 10800 | 30 | 162000 | 97200 | 1123200 | |
Hatzegopteryx | Gold | 35000 | 40 | 700000 | 420000 | 3920000 | |
Herrerasaurus | Bronze | 14200 | 23 | 170400 | 97980 | 1397280 | |
Hypsilophodon | Bronze | 4300 | 20 | 43000 | 25800 | 412800 | |
Ichthyosaurus | Bronze | 5600 | 23 | 67200 | 38640 | 551040 | |
Iguanodon | Silver | 3000 | 30 | 45000 | 27000 | 312000 | |
Kentrosaurus | Bronze | 10600 | 25 | 137800 | 79500 | 1060000 | |
Kronosaurus | Gold | 14000 | 40 | 280000 | 168000 | 1568000 | |
Lambeosaurus | Silver | 18000 | 31 | 288000 | 167400 | 1886400 | |
Liopleurodon | Bronze | 10100 | 22 | 111100 | 66660 | 985760 | |
Maiasaura | Bronze | 2700 | 25 | 35100 | 20250 | 270000 | |
Mammuthus | Silver | 36000 | 35 | 648000 | 378000 | 3888000 | |
Mapusaurus | Gold | 39000 | 44 | 858000 | 514800 | 4492800 | |
Mastodonsaurus | Bronze | 16000 | 24 | 192000 | 115200 | 1587200 | |
Megalodon | Platinum | 50000 | 62 | 1550000 | 930000 | 6480000 | |
Mosasaurus | Platinum | 80000 | 77 | 3120000 | 1848000 | 11328000 | |
Nigersaurus | Silver | 38000 | 36 | 684000 | 410400 | 4134400 | |
Nothosaurus | Bronze | 6200 | 25 | 80600 | 46500 | 620000 | |
Nyctosaurus | Bronze | 3500 | 21 | 38500 | 22050 | 338800 | |
Ouranosaurus | Silver | 40000 | 36 | 720000 | 432000 | 4352000 | |
Oviraptor | Bronze | 6800 | 22 | 74800 | 44880 | 663680 | |
Pachycephalosaurus | Bronze | 200 | 15 | 1600 | 900 | 18400 | |
Paraceratherium | Silver | 46000 | 34 | 782000 | 469200 | 4931200 | |
Parasaurolophus | Silver | 6700 | 30 | 100500 | 60300 | 696800 | |
Plateosaurus | Silver | 16000 | 32 | 256000 | 153600 | 1689600 | |
Plesiosaurus | Bronze | 8000 | 22 | 88000 | 52800 | 780800 | |
Pliosaurus | Gold | 44000 | 43 | 968000 | 567600 | 5033600 | |
Plotosaurus | Silver | 12000 | 34 | 204000 | 122400 | 1286400 | |
Protoceratops | Bronze | 12000 | 21 | 132000 | 75600 | 1161600 | |
Psittacosaurus | Bronze | 1500 | 19 | 15000 | 8550 | 142800 | |
Pteranodon | Bronze | 500 | 11 | 3000 | 1650 | 44400 | |
Pterodaustro | Bronze | 5000 | 34 | 85000 | 51000 | 536000 | |
Quetzalcoatlus | Gold | 5000 | 33 | 85000 | 49500 | 532000 | |
Rhamphorhynchus | Bronze | 9400 | 22 | 103400 | 62040 | 917440 | |
Sarcosuchus | Gold | 42000 | 42 | 882000 | 529200 | 4771200 | |
Sauropelta | Bronze | 1000 | 15 | 8000 | 4500 | 92000 | |
Shonisaurus | Gold | 26000 | 40 | 520000 | 312000 | 2912000 | |
Sinosauropteryx | Bronze | 8800 | 22 | 96800 | 58080 | 858880 | |
Smilodon | Bronze | 13400 | 21 | 147400 | 84420 | 1297120 | |
Spinophorosaurus | Gold | 29000 | 46 | 667000 | 400200 | 3387200 | |
Spinosaurus | Gold | 7000 | 41 | 147000 | 86100 | 789600 | |
Stegosaurus | Silver | 1200 | 21 | 13200 | 7560 | 116160 | |
Styracosaurus | Silver | 8400 | 32 | 134400 | 80640 | 887040 | |
Temnodontosaurus | Silver | 27000 | 33 | 459000 | 267300 | 2872800 | |
Thalassodromeus | Silver | 9600 | 28 | 134400 | 80640 | 983040 | |
Therizinosaurus | Silver | 24000 | 29 | 360000 | 208800 | 2476800 | |
Torosaurus | Silver | 31000 | 32 | 496000 | 297600 | 3273600 | |
Triceratops | Silver | 2000 | 20 | 20000 | 12000 | 192000 | |
Trilobita | Green | 4400 | 12 | 26400 | 15840 | 394240 | |
Tsintaosaurus | Silver | 13600 | 30 | 204000 | 122400 | 1414400 | |
Tuojiangosaurus | Silver | 7900 | 32 | 126400 | 75840 | 834240 | |
Tylosaurus | Gold | 20000 | 40 | 400000 | 240000 | 2240000 | |
Tyrannosaurus rex | Gold | 12000 | 44 | 264000 | 158400 | 1382400 | |
Velociraptor | Bronze | 100 | 10 | 500 | 300 | 8800 | |
Yutyrannus | Silver | 14500 | 31 | 232000 | 134850 | 1519600 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
All dinos at 100% sell ~for 0.6x what they cost, at 200% they sell for ~1.6x.