Skip to content

Instantly share code, notes, and snippets.

@tobyspark
Last active September 12, 2016 14:57
Show Gist options
  • Save tobyspark/654528d9e3ae023a746a to your computer and use it in GitHub Desktop.
Save tobyspark/654528d9e3ae023a746a to your computer and use it in GitHub Desktop.
South London Makerspace - Discourse media downloader
#! /usr/bin/env python3
import sys
from urllib.request import Request, urlopen
from urllib.parse import urlencode
import json
import os.path
if len(sys.argv) != 2:
sys.exit('API key missing')
SLMS_DISCOURSE_QUERY_URL = 'https://discourse.southlondonmakerspace.org/admin/plugins/explorer/queries/14/run'
SLMS_DISCOURSE_VALUES = { 'api_key': sys.argv[1], 'api_username': 'makerspace'}
def file_safe_char(x):
if x.isalnum():
return x
if x in [' ', '-']:
return '_'
return ''
data = urlencode(SLMS_DISCOURSE_VALUES)
request = Request(SLMS_DISCOURSE_QUERY_URL,
data=data.encode('utf-8'),
headers={
'Content-Type':'application/x-www-form-urlencoded;charset=utf-8',
'Accept': '*/*'},
)
with urlopen(request) as api_response:
result = api_response.read().decode('utf-8')
result = json.loads(result)
# print(result.keys()
# ['success', 'params', 'relations', 'duration', 'colrender', 'rows', 'errors', 'columns']
# print(result['columns'])
# ['post', 'username', 'image', 'date']
# for row in result['rows']:
# print(row)
# ['<a href = "https://discourse.southlondonmakerspace.org/p/25753">Networking work', 'jonathanjo', 't', 'Systems', 'https://discourse.southlondonmakerspace.org/uploads/default/original/2X/9/965179f59b9957970fa5219ebbd30d62a1b11d20.jpg', '2016-07-22']
# print('{} | {} | {} | {}'.format(row[0], row[1], row[2], row[3]))
# https://discourse.southlondonmakerspace.org/p/8210 | unknowndomain | https://discourse.southlondonmakerspace.org/uploads/default/original/1X/b3da2864a8dd491f364a2317a6a4e2720a904652.JPG | 2016-01-11
for row in result['rows']:
thread_index = 0
user_index = 1
restricted_index = 2
category_index = 3
image_url_index = 4
date_index = 5
thread_url = row[thread_index].split('"')[1]
thread_title = row[thread_index].split('>')[1]
file_name = '{}-{}---{}'.format(row[date_index], ''.join(file_safe_char(x) for x in row[user_index]), ''.join(file_safe_char(x) for x in thread_title))
if row[restricted_index] == 't':
file_name = file_name + '--RESTRICTED'
file_ext = os.path.splitext(row[image_url_index])[1]
counter = 1
candidate = file_name
while os.path.exists(candidate + file_ext):
candidate = '{}-{:02}'.format(file_name, counter)
counter += 1
file_name = candidate
print('Downloading {} as {}'.format(row[image_url_index], file_name + file_ext))
request = Request(row[image_url_index])
with urlopen(request) as image_response:
with open(file_name + file_ext, 'wb') as image_file:
image_data = image_response.read()
image_file.write(image_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment