Created
March 14, 2020 18:32
-
-
Save ty-porter/739b583118e3ff2988e68f158b2487a0 to your computer and use it in GitHub Desktop.
Reddit bot for generating a CSV file for stale moderators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import csv | |
import datetime | |
import praw | |
import requests | |
import traceback | |
import sys | |
BOT_USERNAME = 'BOT_USERNAME' | |
BOT_PASSWORD = 'BOT_PASSWORD' | |
BOT_CLIENT_ID = 'BOT_CLIENT_ID' | |
BOT_CLIENT_SECRET = 'BOT_CLIENT_SECRET' | |
BOT_USER_AGENT = 'BOT_USER_AGENT' | |
REDDITLIST_URL = 'http://redditlist.com/all?page=' | |
# Edit these as needed to avoid timeouts | |
HI_LIMIT = 15000 # subscribers | |
LO_LIMIT = 10000 # subscribers | |
START_PAGE = 35 # Start page for redditlist.com lookup | |
ACTIVITY_LIMIT = 60 # days | |
class Bot(): | |
def __init__(self): | |
self.reddit = praw.Reddit(username=BOT_USERNAME, | |
password=BOT_PASSWORD, | |
client_id=BOT_CLIENT_ID, | |
client_secret=BOT_CLIENT_SECRET, | |
user_agent=BOT_USER_AGENT) | |
self.subreddits = [] | |
self.date_cutoff = datetime.datetime.utcnow().timestamp() - (ACTIVITY_LIMIT * 24 * 60 * 60) | |
def scrape_subreddits(self): | |
print('Pulling data from redditlist.com...\n') | |
page = START_PAGE | |
low_subscriber_count = HI_LIMIT | |
while low_subscriber_count >= LO_LIMIT: | |
response = requests.get(REDDITLIST_URL + str(page)) | |
parser = BeautifulSoup(response.content, 'lxml-html') | |
subreddits = parser.find_all('div', class_='span4 listing')[1].find_all('div', class_='listing-item') | |
for subreddit in subreddits: | |
name = subreddit.find('span', class_='subreddit-url').a.text | |
subscriber_string = subreddit.find('span', class_='listing-stat').text | |
subscribers = self.subscribers_to_int(subscriber_string) | |
if subscribers < low_subscriber_count: | |
low_subscriber_count = subscribers | |
if subscribers > HI_LIMIT or subscribers < LO_LIMIT: | |
continue | |
else: | |
subreddit_object = { | |
'name': name, | |
'subscribers': subscribers | |
} | |
self.subreddits.append(subreddit_object) | |
page += 1 | |
def subscribers_to_int(self, subscribers): | |
return int(''.join(subscribers.split(','))) | |
def botcode(self): | |
self.scrape_subreddits() | |
if len(self.subreddits) > 50: | |
print('WARNING! You are attempting to crawl through {} subreddits.'.format(len(self.subreddits)), | |
'Recommended limit for this is 50 subreddits.', | |
'Consider a tighter search to limit API load.\n', | |
sep='\n') | |
print('Searching for subreddit moderators...\n') | |
filename = 'stalemods-{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S')) | |
with open(filename, 'w', newline='') as file: | |
writer = csv.writer(file) | |
writer.writerow(['name', 'subscribers', 'total_mods', 'active_mods', 'inactive_mods', 'subreddit_type']) | |
for subreddit in self.subreddits: | |
total_mods = 0 | |
inactive_mods = 0 | |
active_mods = 0 | |
for moderator in self.reddit.subreddit(subreddit['name']).moderator(): | |
total_mods += 1 | |
if self.is_active(moderator): | |
active_mods += 1 | |
else: | |
inactive_mods += 1 | |
subreddit_type = 'nsfw' if self.reddit.subreddit(subreddit['name']).over18 else 'sfw' | |
row = [subreddit['name'], subreddit['subscribers'], total_mods, active_mods, inactive_mods, subreddit_type] | |
writer.writerow(row) | |
print('Finished! Check {} for results.'.format(filename)) | |
def is_active(self, moderator): | |
redditor = self.reddit.redditor(moderator.name) | |
for comment in redditor.comments.new(limit=1): | |
if comment.created_utc > self.date_cutoff: | |
return True | |
break | |
for submission in redditor.submissions.new(limit=1): | |
if submission.created_utc > self.date_cutoff: | |
return True | |
break | |
return False | |
if __name__ == '__main__': | |
try: | |
Bot().botcode() | |
except KeyboardInterrupt: | |
sys.exit(0) | |
except Exception: | |
traceback.print_exc() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment