bielfrontera · September 19, 2017 09:33 · dubirajara · Oct 15, 2016 · dtrillo · Oct 25, 2016
diff --git a/oreilly_free_ebooks.py b/oreilly_free_ebooks.py
 # coding: utf-8
 import os
 import shutil
 import requests
 import re

 TOPIC_LIST = [
    'programming',
    'web-platform',
    'security',
    'iot',
    'data',
    'business',
    'webops-perf'
 ]


 def download_free_ebooks(ebook_topic, ebook_formats=['epub', 'mobi', 'pdf']):
    if not os.path.exists(ebook_topic):
        os.mkdir(ebook_topic)
    ebook_list = get_free_ebook_list(ebook_topic)
    for ebook in ebook_list:
        for ebook_format in ebook_formats:
            download_ebook(ebook_topic, ebook, ebook_format)


 def download_ebook(ebook_topic, ebook_slug, ebook_format):
    ebook_url = get_ebook_url(ebook_topic, ebook_slug, ebook_format)
    ebook_filename = get_ebook_filename(ebook_topic, ebook_slug, ebook_format)
    r = requests.get(ebook_url, stream=True)
    if r.ok:
        with open(ebook_filename, 'wb') as out_file:
            shutil.copyfileobj(r.raw, out_file)
    del r


 def get_free_ebook_list(ebook_topic):
    ebook_list = []
    index_url = 'http://www.oreilly.com/{topic}/free/'.format(topic=ebook_topic)
    r = requests.get(index_url)
    if r.ok:
        ebook_list = get_ebook_list_from_content(ebook_topic, r.content)
    return ebook_list


 def get_ebook_list_from_content(ebook_topic, html_content):
    pattern = r'http://www.oreilly.com/{topic}/free/[\'"]?([^\'" >]+).csp'.format(
        topic=ebook_topic
    )
    book_slugs = re.findall(pattern, html_content)
    return book_slugs


 def get_ebook_url(ebook_topic, ebook_slug, ebook_format):
    ebook_url = "http://www.oreilly.com/{topic}/free/files/{slug}.{ext}".format(
                topic=ebook_topic,
                slug=ebook_slug,
                ext=ebook_format
            )
    return ebook_url


 def get_ebook_filename(ebook_topic, ebook_slug, ebook_format):
    ebook_fn = "{topic}/{slug}.{ext}".format(
                topic=ebook_topic,
                slug=ebook_slug,
                ext=ebook_format
            )
    return ebook_fn


 if __name__ == "__main__":
    for topic in TOPIC_LIST:
        download_free_ebooks(topic)
	# coding: utf-8
	import os
	import shutil
	import requests
	import re

	TOPIC_LIST = [
	'programming',
	'web-platform',
	'security',
	'iot',
	'data',
	'business',
	'webops-perf'
	]


	def download_free_ebooks(ebook_topic, ebook_formats=['epub', 'mobi', 'pdf']):
	if not os.path.exists(ebook_topic):
	os.mkdir(ebook_topic)
	ebook_list = get_free_ebook_list(ebook_topic)
	for ebook in ebook_list:
	for ebook_format in ebook_formats:
	download_ebook(ebook_topic, ebook, ebook_format)


	def download_ebook(ebook_topic, ebook_slug, ebook_format):
	ebook_url = get_ebook_url(ebook_topic, ebook_slug, ebook_format)
	ebook_filename = get_ebook_filename(ebook_topic, ebook_slug, ebook_format)
	r = requests.get(ebook_url, stream=True)
	if r.ok:
	with open(ebook_filename, 'wb') as out_file:
	shutil.copyfileobj(r.raw, out_file)
	del r


	def get_free_ebook_list(ebook_topic):
	ebook_list = []
	index_url = 'http://www.oreilly.com/{topic}/free/'.format(topic=ebook_topic)
	r = requests.get(index_url)
	if r.ok:
	ebook_list = get_ebook_list_from_content(ebook_topic, r.content)
	return ebook_list


	def get_ebook_list_from_content(ebook_topic, html_content):
	pattern = r'http://www.oreilly.com/{topic}/free/[\'"]?([^\'" >]+).csp'.format(
	topic=ebook_topic
	)
	book_slugs = re.findall(pattern, html_content)
	return book_slugs


	def get_ebook_url(ebook_topic, ebook_slug, ebook_format):
	ebook_url = "http://www.oreilly.com/{topic}/free/files/{slug}.{ext}".format(
	topic=ebook_topic,
	slug=ebook_slug,
	ext=ebook_format
	)
	return ebook_url


	def get_ebook_filename(ebook_topic, ebook_slug, ebook_format):
	ebook_fn = "{topic}/{slug}.{ext}".format(
	topic=ebook_topic,
	slug=ebook_slug,
	ext=ebook_format
	)
	return ebook_fn


	if __name__ == "__main__":
	for topic in TOPIC_LIST:
	download_free_ebooks(topic)