ToniRV · April 30, 2020 03:43
diff --git a/parse_springer.py b/parse_springer.py
 #!/usr/bin/env python3

 import xml.etree.ElementTree as ET
 from lxml import html
 import requests

 DIRECTORY='/home/tonirv/Downloads/SpringerBooks/'

 tree = ET.parse('Springer_Ebooks.xml')
 root = tree.getroot()

 ns = {"office": "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
      "draw":"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
      "text":"urn:oasis:names:tc:opendocument:xmlns:text:1.0"}

 # Find the element that has a 'key' attribute with a value of 'applications'
 entries = root.findall(".//draw:text-box", ns)
 name = ''
 for entry in entries:
    for span in entry.find(".//text:p", ns):
        #print(span.tag, span.attrib, span.text)
        url = ''
        for key, value in span.attrib.items():
            if value == 'T2' and span.text is not None:
                try:
                    name += str(span.text) + '_'
                except:
                    print("error reading name")
            if value == 'T3':
                url = span.text

        if url is not '':
            # Download!!
            print ("DOWNLOADING: ", name)
            print ("from: ", url)

            # GET
            r = requests.get(url)

            # Response, status etc
            print(r.status_code)

            tree = html.fromstring(r.text)

            link_element = tree.xpath('//a[@title="Download this book in PDF format"]')

            for (element, attribute, link, pos) in link_element[0].iterlinks():
                download_url = 'http://link.springer.com/%s'%link
                print("Download from: %s. Saving to: %s" % (download_url, name))
                pdf = requests.get(download_url)
                open(DIRECTORY+'/'+name +'.pdf', 'wb').write(pdf.content)

            name = ''
	#!/usr/bin/env python3

	import xml.etree.ElementTree as ET
	from lxml import html
	import requests

	DIRECTORY='/home/tonirv/Downloads/SpringerBooks/'

	tree = ET.parse('Springer_Ebooks.xml')
	root = tree.getroot()

	ns = {"office": "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
	"draw":"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
	"text":"urn:oasis:names:tc:opendocument:xmlns:text:1.0"}

	# Find the element that has a 'key' attribute with a value of 'applications'
	entries = root.findall(".//draw:text-box", ns)
	name = ''
	for entry in entries:
	for span in entry.find(".//text:p", ns):
	#print(span.tag, span.attrib, span.text)
	url = ''
	for key, value in span.attrib.items():
	if value == 'T2' and span.text is not None:
	try:
	name += str(span.text) + '_'
	except:
	print("error reading name")
	if value == 'T3':
	url = span.text

	if url is not '':
	# Download!!
	print ("DOWNLOADING: ", name)
	print ("from: ", url)

	# GET
	r = requests.get(url)

	# Response, status etc
	print(r.status_code)

	tree = html.fromstring(r.text)

	link_element = tree.xpath('//a[@title="Download this book in PDF format"]')

	for (element, attribute, link, pos) in link_element[0].iterlinks():
	download_url = 'http://link.springer.com/%s'%link
	print("Download from: %s. Saving to: %s" % (download_url, name))
	pdf = requests.get(download_url)
	open(DIRECTORY+'/'+name +'.pdf', 'wb').write(pdf.content)

	name = ''