takahub1 · October 13, 2017 09:34
diff --git a/getImage.py b/getImage.py
 #-*- coding:utf-8 -*-

 import os
 import sys
 import time
 import bs4
 import urllib.request

 def crawring(url):
 	# 指定したURLのHTMLを取得
 	html = get_html_string(url)
 	if len(html) < 1:
 		print("HTMLが取得できませんでした。")
 		print("URLを確認してください。")
 		sys.exit(1)

 	soup = bs4.BeautifulSoup(html, "lxml")
 	book_title = soup.title.string
 	book_title = book_title.replace(" ", "_")
 	os.system('mkdir ' + book_title)
 	for a_tag in soup.find_all("div"):
 		href_str = a_tag.get("data-background-image")
 		if href_str is not None:
 			print(href_str)
 			os.system("wget -q -P ./" + book_title + " " + href_str)

 	os.system('zip -r ' + book_title + " " + book_title)
 	os.system("rm -r "+ book_title)

 def get_html_string(url):
 	decoded_html = ""

 	headers = {
 		"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0",
        }

 	request = urllib.request.Request(url=url, headers=headers)
 	response = urllib.request.urlopen(request)
 	decoded_html = response.read().decode('utf-8')

 	return decoded_html

 def main():
 	# 引数確認
 	if len(sys.argv) != 2:
 		sys.exit(1)

 	f = open(sys.argv[1])
 	lines = f.readlines()
 	f.close()

 	for url in lines:
 		crawring(url)
 	
 if __name__ == "__main__":
 	main()
	#-- coding:utf-8 --

	import os
	import sys
	import time
	import bs4
	import urllib.request

	def crawring(url):
	# 指定したURLのHTMLを取得
	html = get_html_string(url)
	if len(html) < 1:
	print("HTMLが取得できませんでした。")
	print("URLを確認してください。")
	sys.exit(1)

	soup = bs4.BeautifulSoup(html, "lxml")
	book_title = soup.title.string
	book_title = book_title.replace(" ", "_")
	os.system('mkdir ' + book_title)
	for a_tag in soup.find_all("div"):
	href_str = a_tag.get("data-background-image")
	if href_str is not None:
	print(href_str)
	os.system("wget -q -P ./" + book_title + " " + href_str)

	os.system('zip -r ' + book_title + " " + book_title)
	os.system("rm -r "+ book_title)

	def get_html_string(url):
	decoded_html = ""

	headers = {
	"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0",
	}

	request = urllib.request.Request(url=url, headers=headers)
	response = urllib.request.urlopen(request)
	decoded_html = response.read().decode('utf-8')

	return decoded_html

	def main():
	# 引数確認
	if len(sys.argv) != 2:
	sys.exit(1)

	f = open(sys.argv[1])
	lines = f.readlines()
	f.close()

	for url in lines:
	crawring(url)

	if __name__ == "__main__":
	main()