Created November 26, 2014 00:45
Benchmarking sentiment scoring algorithms for twitter using precision, recall, F-measure
# Short scripts for testing three different sentiment classifiers on tweets,
# acquiring the tweets used for testing,
# calculating systems' precision, recall and F-measures.
require(RCurl) # For downloading file from a given URL.
require(twitteR) # Used for the 'twitter' class.
require(sentiment) # For bayes and voter classifiers.
source("sent140.R") # Used for the Sentiment 140 API. Can be downloaded from here:
GetSandersCorpus = function() {
# Download the sanders corpus, save it down.
url = "",
destfile = ""
) = "sanders-twitter-0.2/corpus.csv"
zipfile = "",
files = c(
san.dat = read.csv(
file =,
stringsAsFactors = FALSE,
header = FALSE
colnames(san.dat) = c("term", "clas", "id")
san.dat$id = as.character(san.dat$id)
save(san.dat, file="data/san_dat.Rdat")
not.found.message = "Error: Not Found" # Message used to identify tweets no longer avaliable.
# Purge "irrelevant tweets"
san.dat = san.dat[san.dat$clas != "irrelevant", ]
FillSanTweets = function() {
# Pulls tweets using the API based on the Tiwtter ids.
sapply(setdiff(san.dat$id, names(san.tweets)), function(tid) {
san.tweets[tid] <<- showStatus(tid)
print(paste("Successfully added tweet no", tid))
error=function(e) {
msg = geterrmessage()
if(msg == not.found.message) {
print(paste("Tweet no", tid, "longer available"))
san.tweets[tid] <<- NA
} else{
print(paste("FAILED to fetch Tweet no.", tid))
Sys.sleep(15) # delay the next request so we stay within the 180 calls per hour restriction.
# Create a new tweet list (empty).
san.tweets = list()
# Start populating the list by calling FillSanTweets.
#dat = read.csv("data/full-corpus.csv", stringsAsFactors=FALSE)
colnames(dat) = c("term", "sent", "tid", "date", "txt")
dat$tid = as.character(dat$tid)
# "Purge" the irrelevant entries
dat = dat[dat$sent!="irrelevant",]
dat$txt = ScrubTweets(dat$txt)
dat = cbind(stringsAsFactors=FALSE,
classify_polarity(dat$txt, algorithm="voter"),
classify_polarity(dat$txt, algorithm="bayes"),
sentiment(dat$txt)[ , 2]
colnames(dat)[6:9] = c("v.pos", "v.neg", "v.ratio", "")
colnames(dat)[10:13] = c("b.pos", "b.neg", "b.ratio", "")
colnames(dat)[14] = "sent140"
dat$v.ratio = as.numeric(dat$v.ratio)
dat$b.ratio = as.numeric(dat$b.ratio)
Distribution = function() {
# Display pc. distribution across categories.
sapply(list("manual"=dat$sent, "voter"=dat$, "bayes"=dat$, "sent140"=dat$sent140),
function(x) {
round(table(x) / nrow(dat) * 100, digits=2)
ConfusionMatrices = function() {
# Calculate the confusion matrices for the three different algorithms we
# are benchmarking.
"voter" = table(dat$sent, dat$,
"bayes" = table(dat$sent, dat$,
"sent140" = table(dat$sent, dat$sent140)
PrecisionRate = function() {
# Calculate the precision of the two algorithms with respect to the three
# categories.
sapply(c("negative", "neutral", "positive"), function(clas) {
"voter" = mean(dat[dat$ == clas, ]$sent == clas),
"bayes" = mean(dat[dat$ == clas, ]$sent == clas),
"sent140" = mean(dat[dat$sent140 == clas, ]$sent == clas)
RecallRate = function() {
# Calculate the precision of the two algorithms with respect to the three
# categories.
sapply(c("negative", "neutral", "positive"), function(bin) {
"voter" = mean(dat[dat$sent == bin, ]$ == bin),
"bayes" = mean(dat[dat$sent == bin, ]$ == bin),
"sent140" = mean(dat[dat$sent == bin, ]$sent140 == bin)
FMeasures = function() {
# Calculates the f-measures for every classifier / class combination.
p = PrecisionRate()
r = RecallRate()
# Return the harmonic mean of precision and recall
2 * p * r / (p + r)
