Skip to content

Instantly share code, notes, and snippets.

@nobane
Last active April 19, 2019 01:30
Show Gist options
  • Save nobane/19291010152fca0182549569c6164193 to your computer and use it in GitHub Desktop.
Save nobane/19291010152fca0182549569c6164193 to your computer and use it in GitHub Desktop.
Automate running queries on SEDE because #aintnobodygottimeforthat
import time
import os
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import config
target_user_ids = ['438992', '3043', '2141635', '6622587']
MAX_RETRY = 10
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",2)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
driver = webdriver.Firefox(firefox_profile=fp)
driver.get('https://openid.stackexchange.com/account/login')
elem = driver.find_element_by_id('email')
elem.send_keys(config.USERNAME)
elem = driver.find_element_by_id('password')
elem.send_keys(config.PASSWORD)
elem.send_keys(Keys.RETURN)
time.sleep(1)
driver.get('https://data.stackexchange.com/account/login')
elem = driver.find_element_by_css_selector('.preferred-login:nth-child(2)')
elem.click()
def scrape_user(user_id, retry=0):
driver.get("https://data.stackexchange.com/stackoverflow/revision/1028325/1270163/get-activity-timestamps-for-a-user")
elem = driver.find_element_by_id("dynParam0")
elem.clear()
elem.send_keys(user_id)
elem.send_keys(Keys.RETURN)
try:
WebDriverWait(driver, 180).until(
EC.presence_of_element_located((By.CLASS_NAME, "slick-header"))
)
except:
if retry > MAX_RETRY:
return
# Never found the results, recurse and try again
print('Scrape for %s failed, trying again @ %s' % (user_id, datetime.now))
scrape_user(user_id, retry + 1)
driver.command_executor._commands["SET_CONTEXT"] = ("POST", "/session/$sessionId/moz/context")
driver.execute("SET_CONTEXT", {"context": "chrome"})
driver.execute_script("""
Services.prefs.setBoolPref('browser.download.useDownloadDir', true);
Services.prefs.setStringPref('browser.download.dir', arguments[0]);
""", '%s/%s' % (os.getcwd(), user_id))
driver.execute("SET_CONTEXT", {"context": "content"})
elem = driver.find_element_by_id('resultSetsButton')
elem.click()
for user_id in target_user_ids:
scrape_user(user_id)
print('Scraping complete :-)')
driver.close()
@nobane
Copy link
Author

nobane commented Apr 15, 2019

This bot runs on selenium and is designed to take in an arbitrary number of user IDs and plug them into this query. The resulting QueryResults.csv file is saved into a subfolder matching the current user ID being queried.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment