Created
August 9, 2017 15:12
-
-
Save ljvmiranda921/3fcbed4d69f4aced752304fa8f36353b to your computer and use it in GitHub Desktop.
Full-code for Twitter Scraping using tweepy. Accompanying blog post at https://ljvmiranda921.github.io/notebook/2017/02/24/twitter-streaming-using-python/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Full code for scraper | |
Code used in the Twitter Streaming with Python tutorial | |
Author: Lester James V. Miranda | |
Blog post: https://ljvmiranda921.github.io/notebook/2017/02/24/twitter-streaming-using-python/ | |
""" | |
from __future__ import absolute_import, print_function | |
# Import modules | |
from tweepy.streaming import StreamListener | |
from tweepy import OAuthHandler | |
from tweepy import Stream | |
import dataset | |
from sqlalchemy.exc import ProgrammingError | |
# Your credentials go here | |
consumer_key = " " | |
consumer_secret = " " | |
access_token = " " | |
access_token_secret = " " | |
class StdOutListener(StreamListener): | |
""" A listener handles tweets that are received from the stream. | |
This is a basic listener that just prints received tweets to stdout. | |
""" | |
def on_status(self, status): | |
print(status.text) | |
if status.retweeted: | |
return | |
id_str = status.id_str | |
created = status.created_at | |
text = status.text | |
fav = status.favorite_count | |
name = status.user.screen_name | |
description = status.user.description | |
loc = status.user.location | |
user_created = status.user.created_at | |
followers = status.user.followers_count | |
table = db['myTable'] | |
try: | |
table.insert(dict( | |
id_str=id_str, | |
created=created, | |
text=text, | |
fav_count=fav, | |
user_name=name, | |
user_description=description, | |
user_location=loc, | |
user_created=user_created, | |
user_followers=followers, | |
)) | |
except ProgrammingError as err: | |
print(err) | |
def on_error(self, status_code): | |
if status_code == 420: | |
return False | |
if __name__ == '__main__': | |
db = dataset.connect("sqlite:///tweets.db") | |
l = StdOutListener() | |
auth = OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
stream = Stream(auth, l) | |
stream.filter(track=['github', 'tweepy']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment