Skip to content

Instantly share code, notes, and snippets.

@fedeisas
Last active October 6, 2017 13:14
Show Gist options
  • Save fedeisas/854a7de57b735baf8c97fde55ab0e404 to your computer and use it in GitHub Desktop.
Save fedeisas/854a7de57b735baf8c97fde55ab0e404 to your computer and use it in GitHub Desktop.
Wordcloud con búsqueda de tweets
# Librerias requeridas
library('httr')
library('jsonlite')
library('tm')
library('wordcloud')
library('RColorBrewer')
library("SnowballC")
# Keys de acceso (https://apps.twitter.com/)
# Cómo creo una app de Twitter: https://www.digitalocean.com/community/tutorials/how-to-create-a-twitter-app
consKey <- "I4YzkwOtWfdnSwuhxzIw"
consSecret <- "x9yy0SSPfstfIruterpNysda4HcAwXEPO3Foak3mdY"
token <- "21232107-EGlObzhxRvqnnljexKhekANQM1lF0PY4uqAurs0Cj"
tokenSecret <- "YPC4Rt7aa9tN8HdMDwanuyUnmWYuggBCsuGPTc299ITBb"
# Twitter utiliza OAuth para autenticar
myapp = oauth_app("twitter", key=consKey, secret=consSecret)
# Me autentico y consigo una signature
sig = sign_oauth1.0(myapp, token=token, token_secret=tokenSecret)
# Consulto la API, traigo 100 tweets de algun topico
query <- 'messi'
search_url <- paste(
'https://api.twitter.com/1.1/search/tweets.json?count=100&lang=es&q=',
query,
sep=''
)
result <- GET(search_url, sig)
# Parseo el resultado de JSON
json1 = httr::content(result)
json2 = jsonlite::fromJSON(toJSON(json1))
statuses <- json2$statuses
tweets <- unlist(statuses$text)
# Elimino las URLs
tweets <- gsub('(f|ht)tp\\S+\\s*', '', tweets)
# Elimino los @arrobas
tweets <- gsub('@\\S+\\s*', '', tweets)
# Creo un corpus
corpus <- Corpus(VectorSource(tweets))
# Convertir a minusculas
corpus <- tm_map(corpus, content_transformer(tolower))
# Sacar numeros
corpus <- tm_map(corpus, removeNumbers)
# Sacar palabras comunes
corpus <- tm_map(corpus, removeWords, stopwords('spanish'))
# Sacar puntiacion
corpus <- tm_map(corpus, removePunctuation)
# Sacar espacios
corpus <- tm_map(corpus, stripWhitespace)
# Stemming
# corpus <- tm_map(corpus, stemDocument)
# Sacar la consulta original
corpus <- tm_map(corpus, removeWords, unlist(strsplit(query, " ")))
# Creo matriz de documentos
dtm <- TermDocumentMatrix(corpus)
m <- as.matrix(dtm)
# Frecuencias
v <- sort(rowSums(m), decreasing=TRUE)
# Creo un DF con las frecuencias de cada palabra
d <- data.frame(word = names(v), freq=v)
# Dibujo la nube de palabras
wordcloud(words = d$word, freq = d$freq, min.freq = 2,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment