aaronsaderholm · July 19, 2017 14:48
diff --git a/pull_links_from_html.py b/pull_links_from_html.py
 #/usr/bin/env python
 from bs4 import BeautifulSoup
 import sys

 input_file = sys.argv[1]
 output_file = sys.argv[2]
 ifile = open(input_file, "r")
 html = ifile.read()
 ifile.close()

 links = []
 soup = BeautifulSoup(html)
 for link in soup.findAll('a'):
        url = link.get('href')
        print(url)
        links.append(link.get('href'))


 ofile = open(output_file, "w")
 for link in links:
        ofile.write(link + "\n")
 ofile.close()
	#/usr/bin/env python
	from bs4 import BeautifulSoup
	import sys

	input_file = sys.argv[1]
	output_file = sys.argv[2]
	ifile = open(input_file, "r")
	html = ifile.read()
	ifile.close()

	links = []
	soup = BeautifulSoup(html)
	for link in soup.findAll('a'):
	url = link.get('href')
	print(url)
	links.append(link.get('href'))


	ofile = open(output_file, "w")
	for link in links:
	ofile.write(link + "\n")
	ofile.close()