Created
December 26, 2022 13:27
-
-
Save AcrylicShrimp/4c94db38b7d2c4dd2e832a7d53654e42 to your computer and use it in GitHub Desktop.
Korea Steam Deck Parcel Crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import aiohttp | |
import re | |
import time | |
from urllib import parse | |
MUL = 1 | |
PAGE = 1000 | |
BASE = 568587337453 | |
# BASE = 568586360001 6번대는 여기 직전까지 없는거 확인 - 이 번호부터 시작 (12/26) | |
# BASE = 568587435453 7번대는 여기 직전까지 없는거 확인 - 이 번호부터 시작 (12/26) | |
URL = "https://m.search.naver.com/p/csearch/ocontent/util/headerjson.nhn" | |
TOKEN = '' | |
async def update_token(): | |
global TOKEN | |
async with aiohttp.ClientSession() as session: | |
async with session.get(url='https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=568587337453') as res: | |
html = await res.text() | |
match = re.search('"passportKey":"(\\S+)"', html) | |
if match is not None: | |
TOKEN = parse.unquote(match.group(1)) | |
print(f'token={TOKEN}') | |
async def lookup(dst_company_code: str, dst_waybill_number: str) -> dict: | |
async with aiohttp.ClientSession() as session: | |
dcc = dst_company_code | |
dwn = dst_waybill_number | |
headers = { | |
"Host": "m.search.naver.com", | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0", | |
"Accept": "*/*", | |
"Accept-Language": "ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Referer": f"https://search.naver.com/search.naver?sm=top_sly.hst&fbm=0&ie=utf8&query={dwn}", | |
"DNT": "1", | |
"Connection": "keep-alive", | |
"TE": "Trailers" | |
} | |
params = { | |
"callapi": "parceltracking", | |
"t_code": dcc, | |
"t_invoice": dwn, | |
"passportKey": TOKEN | |
} | |
async with session.get(url=URL, headers=headers, params=params) as res: | |
return await res.json() | |
class LookupResult: | |
def __init__(self, res: dict) -> None: | |
self.invoice = res.get('invoiceNo', '') | |
self.item = res.get('itemName', '') | |
self.done = res.get('completeYN', '') == 'Y' | |
async def query_range(dst_company_code: str, base: int, count: int, multiplier: int): | |
results = await asyncio.gather(*[lookup(dst_company_code, base + i * multiplier) for i in (range(count) if 0 <= count else range(0, count, -1))]) | |
results = filter(lambda r: 'message' not in r, results) | |
results = [LookupResult(result) for result in results] | |
if len(results) == 0: | |
print('please update the token.') | |
return filter(lambda r: 'Steam Deck' in r.item, results) | |
async def main(): | |
await update_token() | |
global BASE | |
with open(f'result-{BASE}.csv' if MUL == 1 else f'[x{MUL}] result-{BASE}.csv', 'w') as csv: | |
csv.write('invoice,item,done\n') | |
while True: | |
results = list(await query_range('04', BASE, PAGE, MUL)) | |
print(f'range=[{BASE}..{BASE + PAGE * MUL})') | |
if len(results) != 0: | |
total_64 = 0 | |
total_256 = 0 | |
total_512 = 0 | |
received_64 = 0 | |
received_256 = 0 | |
received_512 = 0 | |
for result in results: | |
if '64' in result.item: | |
total_64 += 1 | |
received_64 += 1 if result.done else 0 | |
if '256' in result.item: | |
total_256 += 1 | |
received_256 += 1 if result.done else 0 | |
if '512' in result.item: | |
total_512 += 1 | |
received_512 += 1 if result.done else 0 | |
for result in results: | |
csv.write( | |
f'{result.invoice},{result.item},{result.done}\n') | |
csv.flush() | |
print(f'stats of 64={received_64}/{total_64}') | |
print(f'stats of 256={received_256}/{total_256}') | |
print(f'stats of 512={received_512}/{total_512}') | |
BASE += PAGE * MUL | |
time.sleep(5) | |
if __name__ == '__main__': | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
try: | |
asyncio.run(main()) | |
except KeyboardInterrupt: | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aiohttp==3.8.3 | |
aiosignal==1.3.1 | |
async-timeout==4.0.2 | |
asyncio==3.4.3 | |
attrs==22.1.0 | |
autopep8==2.0.1 | |
certifi==2022.12.7 | |
charset-normalizer==2.1.1 | |
frozenlist==1.3.3 | |
idna==3.4 | |
multidict==6.0.3 | |
pycodestyle==2.10.0 | |
soupsieve==2.3.2.post1 | |
tomli==2.0.1 | |
urllib3==1.26.13 | |
yarl==1.8.2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment