Last active
July 3, 2018 01:13
-
-
Save alialavia/f6718955b011cc94dddeea45b1a7e4a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
To evaluate the good or bad score of a tweet, we count the number of good and | |
bad words in it. | |
if a word is good, increase the value of good_words by one | |
else if a word is bad, increase the value of bad_words by one | |
if good_words > bad_words then it's a good tweet otherwise it's a bad tweet | |
""" | |
import json | |
import nltk | |
from nltk.stem.porter import * | |
stemmer = PorterStemmer() | |
# Break down a string into words | |
def get_words(str): | |
return nltk.word_tokenize(str) | |
# Load a json object from a file | |
def load_json(json_file): | |
with open(json_file) as f: | |
return json.load(f) | |
# Calculate the average value of words in list_of_words | |
def get_average_word_weight(list_of_words, word_weights): | |
number_of_words = len(list_of_words) | |
sum_of_word_weights = 0.0 | |
if number_of_words == 0: | |
return 0.0 | |
# Iterate through the words in the tweet string | |
for w in list_of_words: | |
stemmed_word = stemmer.stem(w) | |
if stemmed_word in word_weights: | |
sum_of_word_weights += word_weights[stemmed_word] | |
#else: | |
#print ('"' + stemmed_word + '": 0.0,') | |
return sum_of_word_weights / number_of_words | |
def anaylse_tweet(tweet_string, word_weights): | |
words = get_words(tweet_string) | |
avg_tweet_weight = get_average_word_weight(words, word_weights) | |
print ("The weight of the tweet is " + str(avg_tweet_weight)) | |
if avg_tweet_weight > 0: | |
print ("What a presidential thing to say! HUGE!") | |
else: | |
print ("Surely you're joking, Mr. Trump! SAD!") | |
tweet_string = "Thanks to the historic TAX CUTS that I signed into law, your paychecks are going way UP, your taxes are going way DOWN, and America is once again OPEN FOR BUSINESS!" | |
word_weights = load_json("word_weights.json") | |
anaylse_tweet(tweet, word_weights) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment