cacharle · June 27, 2020 07:24 · cacharle · Jun 27, 2020 · cacharle · Jun 27, 2020
diff --git a/scrape_ncurses_howto.py b/scrape_ncurses_howto.py
 import os
 import requests
 from bs4 import BeautifulSoup

 url_base = "http://tldp.org/HOWTO/NCURSES-Programming-HOWTO"
 dir_name = "ncurses_howto"

 respond = requests.get("http://tldp.org/HOWTO/NCURSES-Programming-HOWTO")
 if respond.status_code != 200:
        raise IOError
 content = respond.content

 try:
    os.mkdir(dir_name)
 except:
    pass

 with open(os.path.join(dir_name, "index.html"), "wb") as f:
    f.write(content)

 # content = ""
 # with open("ncurses.html") as f:
 #     content = f.read()

 soup = BeautifulSoup(content, "html.parser")
 dt = soup.find_all(class_="TOC")[0].dl

 a_tags = dt.find_all("a")
 links = [os.path.join(url_base, a["href"]) for a in a_tags if a["href"].find("#") == -1]

 for l in links:
    print(f"requesting {l}")
    respond = requests.get(l)
    if respond.status_code != 200:
        raise IOError
    file_name = os.path.join(dir_name, os.path.basename(l))
    with open(file_name, "wb") as f:
        f.write(respond.content)
    print(f"{l} saved to {file_name}")
	import os
	import requests
	from bs4 import BeautifulSoup

	url_base = "http://tldp.org/HOWTO/NCURSES-Programming-HOWTO"
	dir_name = "ncurses_howto"

	respond = requests.get("http://tldp.org/HOWTO/NCURSES-Programming-HOWTO")
	if respond.status_code != 200:
	raise IOError
	content = respond.content

	try:
	os.mkdir(dir_name)
	except:
	pass

	with open(os.path.join(dir_name, "index.html"), "wb") as f:
	f.write(content)

	# content = ""
	# with open("ncurses.html") as f:
	# content = f.read()

	soup = BeautifulSoup(content, "html.parser")
	dt = soup.find_all(class_="TOC")[0].dl

	a_tags = dt.find_all("a")
	links = [os.path.join(url_base, a["href"]) for a in a_tags if a["href"].find("#") == -1]

	for l in links:
	print(f"requesting {l}")
	respond = requests.get(l)
	if respond.status_code != 200:
	raise IOError
	file_name = os.path.join(dir_name, os.path.basename(l))
	with open(file_name, "wb") as f:
	f.write(respond.content)
	print(f"{l} saved to {file_name}")