Last active
August 24, 2020 06:09
-
-
Save nullableVoidPtr/fc19a11da962fde98895ac4582bcc715 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import asyncio | |
import aiohttp | |
import aiofiles | |
from pathlib import Path | |
import datetime | |
import os | |
import time | |
from sys import argv | |
from bs4 import BeautifulSoup, SoupStrainer | |
BASE_URL = "https://atcoder.jp" | |
extensions = { | |
"C": ".c", | |
"C++": ".cpp", | |
"C++14": ".cpp", | |
"Java": ".java", | |
"Python": ".py", | |
"Python3": ".py", | |
"Bash": ".sh", | |
"bc": ".bc", | |
"Awk": ".awk", | |
"C#": ".cs", | |
"Clojure": ".clj", | |
"Crystal": ".cr", | |
"D": ".d", | |
"Dart": ".dart", | |
"dc": ".dc", | |
"Erlang": ".erl", | |
"Elixir": ".ex", | |
"F#": ".fs", | |
"Forth": ".fs", | |
"Fortran": ".f08", | |
"Go": ".go", | |
"Haskell": ".hs", | |
"Haxe": ".hx", | |
"JavaScript": ".js", | |
"Julia": ".jl", | |
"Kotlin": ".kt", | |
"Lua": ".lua", | |
"Dash": ".sh", | |
"Nim": ".nim", | |
"Objective-C": ".m", | |
"Common Lisp": ".lisp", | |
"OCaml": ".ml", | |
"Octave": ".m", | |
"Pascal": ".pas", | |
"Perl": ".pl", | |
"Raku": ".p6", | |
"PHP": ".php", | |
"Prolog": ".pl", | |
"PyPy2": ".py", | |
"PyPy3": ".py", | |
"Racket": ".rkt", | |
"Ruby": ".rb", | |
"Rust": ".rs", | |
"Scala": ".scala", | |
"Java": ".java", | |
"Scheme": ".scm", | |
"Standard ML": ".sml", | |
"Swift": ".swift", | |
"Text": ".txt", | |
"TypeScript": ".ts", | |
"Visual Basic": ".vb", | |
"Zsh": ".sh", | |
"COBOL - Fixed": ".cob", | |
"COBOL - Free": ".cob", | |
"Brainfuck": ".bf", | |
"Ada2012": ".adb", | |
"Unlambda": ".unl", | |
"Cython": ".pyx", | |
"Sed": ".sed", | |
"Vim": ".vim", | |
} | |
async def fetch(session, url): | |
async with session.get(url) as response: | |
return await response.text() | |
async def getContests(session): | |
soup = BeautifulSoup( | |
await fetch(session, "https://atcoder.jp/contests/archive?page=1"), | |
"html.parser", | |
) | |
for contest in soup.select("table > tbody > tr > td:nth-child(2) > a:nth-child(2)"): | |
yield BASE_URL + contest["href"] | |
for i in range( | |
2, int(soup.select_one("ul.pagination > li:last-child > a").text) + 1 | |
): | |
for contest in BeautifulSoup( | |
await fetch(session, f"https://atcoder.jp/contests/archive?page={i}"), | |
"html.parser", | |
parse_only=SoupStrainer("td"), | |
).select("a:nth-child(2)"): | |
yield BASE_URL + contest["href"] | |
async def getSubmissions(session, contest): | |
contest = f"{contest}/submissions?f.User={argv[1]}" | |
soup = BeautifulSoup(await fetch(session, f"{contest}&page=1"), "html.parser") | |
for submission in soup.select("table > tbody > tr > td:last-child > a"): | |
yield BASE_URL + submission["href"] | |
if last_page := soup.select_one("ul.pagination > li:last-child > a"): | |
for i in range(2, int(last_page.text) + 1): | |
for submission in BeautifulSoup( | |
await fetch(session, f"{contest}&page={i}"), | |
"html.parser", | |
parse_only=SoupStrainer("tr"), | |
).select("td:last-child > a"): | |
yield BASE_URL + submission["href"] | |
async def processSubmission(session, submission): | |
soup = BeautifulSoup(await fetch(session, submission), "html.parser") | |
submission, contest = soup.find("title").get_text(strip=True).split(" - ", 1) | |
submission = submission.split(" #", 1)[-1] | |
when, task, user, lang, score, size, status = [ | |
td.get_text(strip=True) for td in soup.find("table").find_all("td")[:7] | |
] | |
when = time.mktime( | |
datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S%z").timetuple() | |
) | |
( | |
filename := Path( | |
f"./{argv[1]}/{contest}/{task}/{submission} - {status}{extensions.get(lang.split('(')[0].strip(), ' ' + lang)}" | |
) | |
).parent.mkdir(parents=True, exist_ok=True) | |
print(filename) | |
async with aiofiles.open(filename, mode="w+") as f: | |
await f.write(soup.find("pre", id="submission-code").get_text(strip=True)) | |
os.utime(filename, (when, when)) | |
async def main(): | |
async with aiohttp.ClientSession() as session: | |
await asyncio.gather( | |
*[ | |
processSubmission(session, submission) | |
async for contest in getContests(session) | |
async for submission in getSubmissions(session, contest) | |
] | |
) | |
if __name__ == "__main__": | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import re | |
from pathlib import Path | |
import datetime | |
import requests | |
import os | |
import time | |
from sys import argv | |
from bs4 import BeautifulSoup | |
BASE_URL = "https://atcoder.jp" | |
extensions = { | |
"C": ".c", | |
"C++": ".cpp", | |
"C++14": ".cpp", | |
"Java": ".java", | |
"Python": ".py", | |
"Python3": ".py", | |
"Bash": ".sh", | |
"bc": ".bc", | |
"Awk": ".awk", | |
"C#": ".cs", | |
"Clojure": ".clj", | |
"Crystal": ".cr", | |
"D": ".d", | |
"Dart": ".dart", | |
"dc": ".dc", | |
"Erlang": ".erl", | |
"Elixir": ".ex", | |
"F#": ".fs", | |
"Forth": ".fs", | |
"Fortran": ".f08", | |
"Go": ".go", | |
"Haskell": ".hs", | |
"Haxe": ".hx", | |
"JavaScript": ".js", | |
"Julia": ".jl", | |
"Kotlin": ".kt", | |
"Lua": ".lua", | |
"Dash": ".sh", | |
"Nim": ".nim", | |
"Objective-C": ".m", | |
"Common Lisp": ".lisp", | |
"OCaml": ".ml", | |
"Octave": ".m", | |
"Pascal": ".pas", | |
"Perl": ".pl", | |
"Raku": ".p6", | |
"PHP": ".php", | |
"Prolog": ".pl", | |
"PyPy2": ".py", | |
"PyPy3": ".py", | |
"Racket": ".rkt", | |
"Ruby": ".rb", | |
"Rust": ".rs", | |
"Scala": ".scala", | |
"Java": ".java", | |
"Scheme": ".scm", | |
"Standard ML": ".sml", | |
"Swift": ".swift", | |
"Text": ".txt", | |
"TypeScript": ".ts", | |
"Visual Basic": ".vb", | |
"Zsh": ".sh", | |
"COBOL - Fixed": ".cob", | |
"COBOL - Free": ".cob", | |
"Brainfuck": ".bf", | |
"Ada2012": ".adb", | |
"Unlambda": ".unl", | |
"Cython": ".pyx", | |
"Sed": ".sed", | |
"Vim": ".vim", | |
} | |
s = requests.session() | |
for contest_link in [ | |
BASE_URL | |
+ row.find("a", attrs={"href": re.compile("^/contests/.+/submissions\?f\.User=")})[ | |
"href" | |
] | |
for row in BeautifulSoup( | |
s.get(f"{BASE_URL}/users/{argv[1]}/history").text, "html.parser" | |
).select("table#history > tbody > tr") | |
]: | |
for pageNo in range( | |
1, | |
int( | |
BeautifulSoup(s.get(contest_link).text, "html.parser") | |
.select_one( | |
"div.text-center:last-child > ul.pagination > li:last-child > a" | |
) | |
.get_text(strip=True) | |
) | |
+ 1, | |
): | |
for submission_link in [ | |
BASE_URL + detail["href"] | |
for detail in BeautifulSoup(s.get(f"{contest_link}&page={pageNo}").text, "html.parser").select("table > tbody > tr > td:last-child > a") | |
]: | |
soup = BeautifulSoup(s.get(submission_link).text, "html.parser") | |
submission, contest = ( | |
soup.find("title").get_text(strip=True).split(" - ", 1) | |
) | |
submission = submission.split(" #", 1)[-1] | |
when, task, user, lang, score, size, status = [ | |
td.get_text(strip=True) for td in soup.find("table").find_all("td")[:7] | |
] | |
when = time.mktime(datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S%z").timetuple()) | |
( | |
filename := Path( | |
f"./{argv[1]}/{contest}/{task}/{submission} - {status}{extensions.get(lang.split('(')[0].strip(), ' ' + lang)}" | |
) | |
).parent.mkdir(parents=True, exist_ok=True) | |
print(filename) | |
with open(filename, "w+") as f: | |
f.write(soup.find("pre", id="submission-code").get_text(strip=True)) | |
os.utime(filename, (when, when)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment