sdomi · July 25, 2019 20:23
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/bin/python

 from bs4 import BeautifulSoup
 import urllib.request
 import sys
 import re
 import os
 arg = sys.argv[1]

 opener = urllib.request.build_opener()
 opener.addheaders = [('User-Agent','Mozilla/5.0')]
 urllib.request.install_opener(opener)

 while True:

    dirname = re.search(r'(?<=thread/)[^.\s]*',arg).group(0)
    boardname = re.search(r'(?<=.org/)[^/\s]*',arg).group(0)

    req = urllib.request.Request(arg)
    try:
        with urllib.request.urlopen(req) as response:
           soup = BeautifulSoup(response.read(),features="html.parser")
    except urllib.error.HTTPError:
        print("404, Trying archive.nyafuu.org...")
        req = urllib.request.Request('https://archive.nyafuu.org/'+boardname+'/thread/'+dirname)
        with urllib.request.urlopen(req) as response:
            soup = BeautifulSoup(response.read(),features="html.parser")


    try:
        os.mkdir(dirname)
    except FileExistsError as error:
        print('Directory '+dirname+' already exists, not creating...')

    for link in soup.findAll("a", {"class":"fileThumb"})+soup.findAll("a", {"class":"thread_image_link"}):
      if link['href'].find('http') != -1:
        url = link['href']
      else:
        url = 'http:'+link['href']
      filename = url.rsplit('/', 1)[-1]
      if os.path.isfile(dirname+'/'+filename) != True:
         print('Downloading '+filename+'...')
         urllib.request.urlretrieve(url,dirname+'/'+filename,)
      else:
         print('Skipping '+filename+', because it already exists.')

    found = False
    for link in soup.findAll("span", {"class":"deadlink"})+soup.findAll("a", {"class":"quotelink"})+soup.findAll("a", {"class":"backlink"}):
        url = link.text

        if int(url[2:]) < int(dirname):
            print('Found previous thread, following the link')
            found = True
            arg = arg[:-len(dirname)]+url[2:]
            break

    if found == False:
        print('Previous thread not found. Exitting..')
        break
	#!/bin/python

	from bs4 import BeautifulSoup
	import urllib.request
	import sys
	import re
	import os
	arg = sys.argv[1]

	opener = urllib.request.build_opener()
	opener.addheaders = [('User-Agent','Mozilla/5.0')]
	urllib.request.install_opener(opener)

	while True:

	dirname = re.search(r'(?<=thread/)[^.\s]*',arg).group(0)
	boardname = re.search(r'(?<=.org/)[^/\s]*',arg).group(0)

	req = urllib.request.Request(arg)
	try:
	with urllib.request.urlopen(req) as response:
	soup = BeautifulSoup(response.read(),features="html.parser")
	except urllib.error.HTTPError:
	print("404, Trying archive.nyafuu.org...")
	req = urllib.request.Request('https://archive.nyafuu.org/'+boardname+'/thread/'+dirname)
	with urllib.request.urlopen(req) as response:
	soup = BeautifulSoup(response.read(),features="html.parser")


	try:
	os.mkdir(dirname)
	except FileExistsError as error:
	print('Directory '+dirname+' already exists, not creating...')

	for link in soup.findAll("a", {"class":"fileThumb"})+soup.findAll("a", {"class":"thread_image_link"}):
	if link['href'].find('http') != -1:
	url = link['href']
	else:
	url = 'http:'+link['href']
	filename = url.rsplit('/', 1)[-1]
	if os.path.isfile(dirname+'/'+filename) != True:
	print('Downloading '+filename+'...')
	urllib.request.urlretrieve(url,dirname+'/'+filename,)
	else:
	print('Skipping '+filename+', because it already exists.')

	found = False
	for link in soup.findAll("span", {"class":"deadlink"})+soup.findAll("a", {"class":"quotelink"})+soup.findAll("a", {"class":"backlink"}):
	url = link.text

	if int(url[2:]) < int(dirname):
	print('Found previous thread, following the link')
	found = True
	arg = arg[:-len(dirname)]+url[2:]
	break

	if found == False:
	print('Previous thread not found. Exitting..')
	break