Created
February 21, 2021 15:59
-
-
Save skyleaworlder/722e35f22cd61ee61d3cc1c6e6ebfaae to your computer and use it in GitHub Desktop.
jb51(脚本之家) pdf 网页存活情况检查 (非百度云链接检查)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import urllib | |
import re | |
import sys | |
def getInfo(book_id): | |
url = "https://www.jb51.net/books/"+str(book_id)+".html" | |
res = requests.get(url) | |
res.encoding = "GBK" | |
pattern = re.compile("<h1 itemprop=\"name\">(.*?)</h1>") | |
book_name = re.findall(pattern, res.text) | |
if len(book_name) != 0: | |
return { "id": book_id, "name": book_name[0] } | |
return { "id": book_id, "name": "" } | |
def scan(beg, end): | |
total = end - beg | |
res = [] | |
for idx in range(int(beg), int(end)): | |
info = getInfo(idx) | |
if info["name"] != "": | |
res.append(info) | |
print("[success]("+str(idx-beg)+"/"+str(total)+"): "+info["name"]) | |
else: | |
print("[failed_]("+str(idx-beg)+"/"+str(total)+"): "+"[x]") | |
return res | |
def logoutput(res): | |
res = [str(elem["id"])+" -> "+elem["name"] for elem in res] | |
with open("book.log", "w", encoding="utf-8") as f: | |
for elem in res: | |
f.write(elem + "\n") | |
if __name__ == "__main__": | |
beg = int(sys.argv[1]) | |
end = int(sys.argv[2]) | |
print("Begin to scan:") | |
res = scan(beg, end) | |
logoutput(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment