Last active
August 31, 2016 06:40
-
-
Save durcana/f6378d6158b1a7167bd6 to your computer and use it in GitHub Desktop.
The main script feed.py streams live filtered tweets that contain the words birth and conviction. It returns the tweets along with their sentiment value, evaluated from naive_bayes.py. I'm looking to add a script that will use all the collected tweets to return the average sentiment for the words birth and conviction.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from json import loads | |
from tweepy import OAuthHandler | |
import os | |
from tweepy import Stream | |
from tweepy.streaming import StreamListener | |
import naive_bayes | |
ckey = os.environ.get('CKEY') | |
csecret = os.environ.get('CSECRET') | |
atoken = os.environ.get('ATOKEN') | |
asecret = os.environ.get('ASECRET') | |
class Listener(StreamListener): | |
def on_data(self, data): | |
json_data = loads(data) | |
tweet = json_data['text'] | |
if 'conviction' in tweet or 'birth' in tweet: | |
print(tweet) | |
print(naive_bayes.sentiment(tweet)) | |
return True | |
def on_error(self, status): | |
print(status) | |
def main(): | |
auth = OAuthHandler(ckey, csecret) | |
auth.set_access_token(atoken, asecret) | |
twitter_stream = Stream(auth, Listener()) | |
twitter_stream.filter(track=['conviction', 'birth'], languages=['en']) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
import random | |
from nltk.corpus import twitter_samples | |
DELEGATION_RESULTS = None | |
def word_delegation(): | |
positive = twitter_samples.tokenized('positive_tweets.json') | |
negative = twitter_samples.tokenized('negative_tweets.json') | |
tweets, all_words = [], [] | |
for tweet in positive: | |
tweets.append((tweet, 'pos')) | |
for word in tweet: | |
all_words.append(word.lower()) | |
for tweet in negative: | |
tweets.append((tweet, 'neg')) | |
for word in tweet: | |
all_words.append(word.lower()) | |
word_features = list(nltk.FreqDist(all_words).keys())[:1000] | |
return tweets, word_features | |
def find_features(tweet): | |
global DELEGATION_RESULTS | |
if DELEGATION_RESULTS is None: | |
DELEGATION_RESULTS = word_delegation() | |
tweets, word_features = DELEGATION_RESULTS | |
words = set(tweet) | |
return {w: w in words for w in word_features} | |
def bayes(count): | |
tweets, word_features = word_delegation() | |
feature_sets = [(find_features(tweet), category) for (tweet, category) in tweets] | |
random.shuffle(feature_sets) | |
training_set = feature_sets[:count] | |
classifier = nltk.NaiveBayesClassifier.train(training_set) | |
return classifier | |
def sentiment(text): | |
classifier = bayes(count=1900) | |
features = find_features(text) | |
return classifier.classify(features) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment