Skip to content

Instantly share code, notes, and snippets.

@NWPlayer123
Created March 31, 2020 04:35
Show Gist options
  • Save NWPlayer123/b066e3e5b9bcc399c7c42d051a4a1506 to your computer and use it in GitHub Desktop.
Save NWPlayer123/b066e3e5b9bcc399c7c42d051a4a1506 to your computer and use it in GitHub Desktop.
Animal Crossing New Horizons data scraping (to show fixer algo)
from bcsv_reader import BCSV
from msbt_reader import MSBT
from binascii import hexlify
from os import listdir
import sys, string, codecs
'''reload(sys)
sys.setdefaultencoding('utf8')'''
msg_path = "../message1.1"
bcsv_path = "."
output_name = "scrape_data_test.txt"
def getindices(s):
return [i for i, c in enumerate(s) if c.isupper()]
#grab item ids and names
item_strings = []
item_id2str = {}
item_str2id = {}
for filename in listdir(msg_path + "/String_USen/Item/"):
if filename.endswith(".msbt"):
labels, text = MSBT().read_msbt("%s/String_USen/Item/%s" % (msg_path, filename))
labels2 = []
for entry in labels: #take from 3D groups to 2D array
for entry2 in entry:
labels2.append(entry2)
labels2.sort(key=lambda x: x[1]) #sort by text index
for entry in labels2:
if not entry[0].endswith(b"_pl"): #filter out plural
itemid = int(entry[0].split(b"_")[1]) #Rug_07322 -> 7322
itemname = text[entry[1]][0]
item_strings.append([itemid, itemname])
item_id2str[itemid] = itemname
item_str2id[itemname] = itemid
#grab clothing ids and names
cloth_strings = []
cloth_id2str = {}
cloth_str2id = {}
for filename in listdir(msg_path + "/String_USen/Outfit/GroupName/"):
if filename.endswith(".msbt"):
labels, text = MSBT().read_msbt("%s/String_USen/Outfit/GroupName/%s" % (msg_path, filename))
labels2 = []
for entry in labels: #take from 3D groups to 2D array
for entry2 in entry:
labels2.append(entry2)
labels2.sort(key=lambda x: x[1]) #sort by text index
for entry in labels2:
itemid = int(entry[0])
itemname = text[entry[1]][0]
cloth_strings.append([itemid, itemname])
cloth_id2str[itemid] = itemname
cloth_str2id[itemname] = itemid
#convert cloth_itemid -> item_itemid
cloth_data = BCSV().read_bcsv(bcsv_path + "/ItemClothGroup.bcsv")
cloth_lookup = {}
cloth_lookup2 = {}
cloth_data2 = []
for entry in cloth_data:
clothid = entry[0x54706054] #string id
itemid = entry[0x65503F9F] #item id
itemname = entry[0x13AB5198].decode("UTF-8").rstrip(u"\0").encode("UTF-8") #TODO: assert at end?
itemdesc = entry[0x036E8EBE].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
cloth_lookup[clothid] = itemid
cloth_lookup2[itemname] = itemid
cloth_data2.append([clothid, itemid, itemname, itemdesc])
#add clothes to item_strings to add in next step, skip if
for entry in cloth_data2:
try: #store the string names that match, we'll extrapolate later
item_strings.append([entry[1], cloth_id2str[entry[0]]])
except: pass
#create lookup table for everything
item_lookup = {}
for entry in item_strings:
item_lookup[entry[0]] = entry[1]
#parse table for IDs and filenames
item_data = BCSV().read_bcsv(bcsv_path + "/ItemParam.bcsv")
item_table = []
do_later = []
dupe_lookup = {}
skipme = False
with open(output_name, "wb") as o:
o.write(b"Item ID (hex), Buy Price, Sell Price, Item Name, item_type, file_name, file_desc\r\n")
for entry in item_data:
item_id = entry[0x54706054]
buy_price = entry[0x718B024D]
sell_price = int(buy_price / 4.0)
file_name = entry[0x3FEBC642].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
item_type = entry[0xFC275E86].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
try:
file_desc = entry[0xB8CC232C].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
except: #bcsv stupid limit, 0x40 truncated string
file_desc = b"FILE_DESC_TRUNCATED " + entry[0xB8CC232C].decode("UTF-8", "ignore").encode("UTF-8")
try:
item_name = item_lookup[item_id] #check if we have the name for that id, else do_later
item_table.append([item_id, buy_price, sell_price, item_name, item_type, file_name, file_desc])
dupe_lookup[file_name.rstrip(string.digits)] = item_name #store base for most stuff, e.g. 0, 1, 2 of thing needs to be looked up
indices = getindices(file_name) #strip end of string so e.g. TwotoneMonotone + variants
#o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (item_id, buy_price, sell_price, item_name.encode("UTF-8"), item_type, file_name, file_desc))
if len(indices) > 1 and len(file_name[:indices[-1]]) > 5:
dupe_lookup[file_name[:indices[-1]]] = item_name #store item name for color variants
except:
do_later.append([item_id, buy_price, sell_price, item_type, file_name, file_desc])
for entry in do_later:
file_name = entry[4]
try:
item_name = dupe_lookup[file_name.rstrip(string.digits)]
except:
try:
indices = getindices(file_name)
item_name = dupe_lookup[file_name[:indices[-1]]]
except: item_name = b"UNUSED"
print("%s %s" % (entry[4], item_name.encode("UTF-8")))
item_table.append([entry[0], entry[1], entry[2], item_name, entry[3], entry[4], entry[5]])
item_table.sort(key=lambda x: x[0]) #sort by item_id
for entry in item_table:
o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (entry[0], entry[1], entry[2], entry[3].encode("UTF-8"), entry[4], entry[5], entry[6]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment