Skip to content

Instantly share code, notes, and snippets.

@ToniRV
Created April 30, 2020 03:43
Show Gist options
  • Save ToniRV/726b55de83f6c3666460bc039e3cf78c to your computer and use it in GitHub Desktop.
Save ToniRV/726b55de83f6c3666460bc039e3cf78c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from lxml import html
import requests
DIRECTORY='/home/tonirv/Downloads/SpringerBooks/'
tree = ET.parse('Springer_Ebooks.xml')
root = tree.getroot()
ns = {"office": "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
"draw":"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
"text":"urn:oasis:names:tc:opendocument:xmlns:text:1.0"}
# Find the element that has a 'key' attribute with a value of 'applications'
entries = root.findall(".//draw:text-box", ns)
name = ''
for entry in entries:
for span in entry.find(".//text:p", ns):
#print(span.tag, span.attrib, span.text)
url = ''
for key, value in span.attrib.items():
if value == 'T2' and span.text is not None:
try:
name += str(span.text) + '_'
except:
print("error reading name")
if value == 'T3':
url = span.text
if url is not '':
# Download!!
print ("DOWNLOADING: ", name)
print ("from: ", url)
# GET
r = requests.get(url)
# Response, status etc
print(r.status_code)
tree = html.fromstring(r.text)
link_element = tree.xpath('//a[@title="Download this book in PDF format"]')
for (element, attribute, link, pos) in link_element[0].iterlinks():
download_url = 'http://link.springer.com/%s'%link
print("Download from: %s. Saving to: %s" % (download_url, name))
pdf = requests.get(download_url)
open(DIRECTORY+'/'+name +'.pdf', 'wb').write(pdf.content)
name = ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment