Skip to content

Instantly share code, notes, and snippets.

@arafsheikh
Last active April 29, 2017 12:48
Show Gist options
  • Save arafsheikh/080c662f188fce5041faef8ba7c3ee9e to your computer and use it in GitHub Desktop.
Save arafsheikh/080c662f188fce5041faef8ba7c3ee9e to your computer and use it in GitHub Desktop.
Realtime Ideone scraper: Outputs all links in realtime which match the given STDIN
import os
import queue
import signal
import threading
import urllib
from bs4 import BeautifulSoup
URL = "http://ideone.com/recent"
MAX_PROCESSED = 200 # Max number of IDs to store before cleanup()
DELAY = 20 # Sleep between fetching recent codes webpage to reduce load
class SetQueue(queue.Queue):
"""
A queue with only unique items.
"""
def __init__(self):
self.queue = set()
def _put(self, item):
self.queue.add(item)
def _get(self):
return self.queue.pop()
def _empty(self):
return len(self.queue) == 0
def _len(self):
return len(self.queue)
def fetchRecents():
threading.Timer(DELAY, fetchRecents).start()
soup = BeautifulSoup(urllib.request.urlopen(URL).read(), "lxml")
for source in soup.find_all("div", " header"):
_id = source.find("a").text[1:]
if not _id in processed:
q._put(_id)
def process(match_input):
while not q._empty():
_id = q._get()
soup = BeautifulSoup(urllib.request.urlopen(
"http://ideone.com/"+_id).read(), "lxml")
stdin = soup.find("div", id="view_stdin").find_all("div")[2].text
if match_input:
stdin = stdin.lower().replace("\n", "").replace(" ", "")
if stdin == match_input:
print("Result found at: http://ideone.com/"+_id)
else:
print(stdin)
print("-"*30)
processed.append(_id)
else:
print("Done processing the queue, waiting for the next batch of IDs")
def cleanup():
if len(processed) >= MAX_PROCESSED:
# Delete all except the last 50 IDs so that we don't miss unprocessed
# IDs
del processed[:-50]
def print_stats_and_exit(signal, frame):
print("\nTotal records processed: ", len(processed))
print("Records yet to process: ", q._len())
os._exit(0)
# Print stats and exit
signal.signal(signal.SIGINT, print_stats_and_exit)
q = SetQueue()
processed = []
if __name__ == "__main__":
print("Enter stdin to match (Ctrl-Y to end input): ")
match_input = "\n".join(iter(input, "\x19"))
match_input = match_input.lower().replace("\n", "").replace(" ", "")
if not match_input:
print("No input to match. Program will run in debug mode.\n")
fetchRecents()
while True:
process(match_input)
cleanup()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment