tobyspark · September 12, 2016 14:57
diff --git a/slms-discourse-media-downloader.py b/slms-discourse-media-downloader.py
 #! /usr/bin/env python3

 import sys
 from urllib.request import Request, urlopen
 from urllib.parse import urlencode
 import json
 import os.path

 if len(sys.argv) != 2:
    sys.exit('API key missing')

 SLMS_DISCOURSE_QUERY_URL = 'https://discourse.southlondonmakerspace.org/admin/plugins/explorer/queries/14/run'
 SLMS_DISCOURSE_VALUES = { 'api_key': sys.argv[1], 'api_username': 'makerspace'}

 def file_safe_char(x):
    if x.isalnum():
        return x
    if x in [' ', '-']:
        return '_'
    return ''

 data = urlencode(SLMS_DISCOURSE_VALUES)
 request = Request(SLMS_DISCOURSE_QUERY_URL,
                    data=data.encode('utf-8'),
                    headers={
                        'Content-Type':'application/x-www-form-urlencoded;charset=utf-8',
                        'Accept': '*/*'},
                    )

 with urlopen(request) as api_response:
    result = api_response.read().decode('utf-8')
    result = json.loads(result)
    
    # print(result.keys()
    # ['success', 'params', 'relations', 'duration', 'colrender', 'rows', 'errors', 'columns']

    # print(result['columns'])
    # ['post', 'username', 'image', 'date']
    
    # for row in result['rows']:
    #     print(row)
    #     ['<a href = "https://discourse.southlondonmakerspace.org/p/25753">Networking work', 'jonathanjo', 't', 'Systems', 'https://discourse.southlondonmakerspace.org/uploads/default/original/2X/9/965179f59b9957970fa5219ebbd30d62a1b11d20.jpg', '2016-07-22']
    #     print('{} | {} | {} | {}'.format(row[0], row[1], row[2], row[3]))
    # https://discourse.southlondonmakerspace.org/p/8210 | unknowndomain | https://discourse.southlondonmakerspace.org/uploads/default/original/1X/b3da2864a8dd491f364a2317a6a4e2720a904652.JPG | 2016-01-11
    
    for row in result['rows']:
        thread_index = 0
        user_index = 1
        restricted_index = 2
        category_index = 3
        image_url_index = 4
        date_index = 5
        
        thread_url = row[thread_index].split('"')[1]
        thread_title = row[thread_index].split('>')[1]
        
        file_name = '{}-{}---{}'.format(row[date_index], ''.join(file_safe_char(x) for x in row[user_index]), ''.join(file_safe_char(x) for x in thread_title))
        if row[restricted_index] == 't':
            file_name = file_name + '--RESTRICTED'
        file_ext = os.path.splitext(row[image_url_index])[1]
    
        counter = 1
        candidate = file_name
        while os.path.exists(candidate + file_ext):
            candidate = '{}-{:02}'.format(file_name, counter)
            counter += 1
        file_name = candidate
    
        print('Downloading {} as {}'.format(row[image_url_index], file_name + file_ext))
    
        request = Request(row[image_url_index])
        with urlopen(request) as image_response:
            with open(file_name + file_ext, 'wb') as image_file:
                image_data = image_response.read()
                image_file.write(image_data)
	#! /usr/bin/env python3

	import sys
	from urllib.request import Request, urlopen
	from urllib.parse import urlencode
	import json
	import os.path

	if len(sys.argv) != 2:
	sys.exit('API key missing')

	SLMS_DISCOURSE_QUERY_URL = 'https://discourse.southlondonmakerspace.org/admin/plugins/explorer/queries/14/run'
	SLMS_DISCOURSE_VALUES = { 'api_key': sys.argv[1], 'api_username': 'makerspace'}

	def file_safe_char(x):
	if x.isalnum():
	return x
	if x in [' ', '-']:
	return '_'
	return ''

	data = urlencode(SLMS_DISCOURSE_VALUES)
	request = Request(SLMS_DISCOURSE_QUERY_URL,
	data=data.encode('utf-8'),
	headers={
	'Content-Type':'application/x-www-form-urlencoded;charset=utf-8',
	'Accept': '/'},
	)

	with urlopen(request) as api_response:
	result = api_response.read().decode('utf-8')
	result = json.loads(result)

	# print(result.keys()
	# ['success', 'params', 'relations', 'duration', 'colrender', 'rows', 'errors', 'columns']

	# print(result['columns'])
	# ['post', 'username', 'image', 'date']

	# for row in result['rows']:
	# print(row)
	# ['<a href = "https://discourse.southlondonmakerspace.org/p/25753">Networking work', 'jonathanjo', 't', 'Systems', 'https://discourse.southlondonmakerspace.org/uploads/default/original/2X/9/965179f59b9957970fa5219ebbd30d62a1b11d20.jpg', '2016-07-22']
	# print('{} \| {} \| {} \| {}'.format(row[0], row[1], row[2], row[3]))
	# https://discourse.southlondonmakerspace.org/p/8210 \| unknowndomain \| https://discourse.southlondonmakerspace.org/uploads/default/original/1X/b3da2864a8dd491f364a2317a6a4e2720a904652.JPG \| 2016-01-11

	for row in result['rows']:
	thread_index = 0
	user_index = 1
	restricted_index = 2
	category_index = 3
	image_url_index = 4
	date_index = 5

	thread_url = row[thread_index].split('"')[1]
	thread_title = row[thread_index].split('>')[1]

	file_name = '{}-{}---{}'.format(row[date_index], ''.join(file_safe_char(x) for x in row[user_index]), ''.join(file_safe_char(x) for x in thread_title))
	if row[restricted_index] == 't':
	file_name = file_name + '--RESTRICTED'
	file_ext = os.path.splitext(row[image_url_index])[1]

	counter = 1
	candidate = file_name
	while os.path.exists(candidate + file_ext):
	candidate = '{}-{:02}'.format(file_name, counter)
	counter += 1
	file_name = candidate

	print('Downloading {} as {}'.format(row[image_url_index], file_name + file_ext))

	request = Request(row[image_url_index])
	with urlopen(request) as image_response:
	with open(file_name + file_ext, 'wb') as image_file:
	image_data = image_response.read()
	image_file.write(image_data)