jadianes · August 13, 2015 11:23
diff --git a/server.R b/server.R
 library(shiny)
 library(tm)
 library(SnowballC)
 library(randomForest)

 options(mc.cores=1)

 build_model <- function(new_data_df, sparsity) {
    # Create new data corpus
    new_corpus <- Corpus(VectorSource(new_data_df$Text))
    new_corpus <- tm_map(new_corpus, content_transformer(tolower))
    new_corpus <- tm_map(new_corpus, removePunctuation)
    new_corpus <- tm_map(new_corpus, removeWords, stopwords("english"))
    new_corpus <- tm_map(new_corpus, stripWhitespace)
    new_corpus <- tm_map(new_corpus, stemDocument)
    
    # create document-term matrix
    new_dtm <- DocumentTermMatrix(new_corpus)
    new_dtm <- removeSparseTerms(new_dtm, sparsity)
    new_dtm_df <- as.data.frame(as.matrix(new_dtm))
    colnames(new_dtm_df) <- make.names(colnames(new_dtm_df))
    
    # intersect corpora and prepare final training data
    common_names <- intersect(colnames(train_dtm_df),colnames(new_dtm_df))
    new_dtm_df <- subset(new_dtm_df, select=names(new_dtm_df) %in% common_names)
    
    model_train_data_df <- cbind(train_data_df, subset(train_dtm_df, select=names(train_dtm_df) %in% common_names))
    model_train_data_df$Text <- NULL
    
    # train classifier
    model <- randomForest(Sentiment~.,data=model_train_data_df, ntree=50)

    # return value as a list    
    list(model, new_dtm_df)
 }


 shinyServer(function(input, output) {
    
    output$contents <- renderTable({
        results()
    })
    
    output$distribution <- renderPlot({
        if (is.null(results()))
            return(NULL)
        d <- density(
            as.numeric(results()$Prob > input$threshold)
        )
        plot(
            d, 
            xlim = c(0, 1),
            main=paste0("Sentiment Distribution (Prob > ", input$threshold, ")")
        )
        polygon(d, col="lightgrey", border="lightgrey")
        abline(v = input$threshold, col = "blue")
    })
    
    results <- reactive({
        inFile <- input$file1
            
        if (is.null(inFile))
            return(NULL)
        
        # load input data
        new_data_df <- read.csv(
            inFile$datapath, 
            sep='\t', 
            header=FALSE, 
            quote = "",
            stringsAsFactor=F,
            col.names=c("Text")
        )
        
        model_and_data <- build_model(new_data_df, input$sparsity)
        
        pred <- predict(model_and_data[[1]], newdata=model_and_data[[2]], type="prob")
        
        new_data_df$Prob <- pred[,2]

        # return data frame
        new_data_df
    })
 })

 # This is the init code, that will be run when the web app loads

 # Load train and test data
 train_data_df <- read.csv(
    file = 'train_data.tsv',
    sep='\t', 
    quote = "",
    header=FALSE, 
    stringsAsFactor=F,
    col.names=c("Sentiment", "Text")
 )
 train_data_df$Sentiment <- as.factor(train_data_df$Sentiment)

 # Create training corpus for later re-use
 train_corpus <- Corpus(VectorSource(train_data_df$Text))
 train_corpus <- tm_map(train_corpus, content_transformer(tolower))
 train_corpus <- tm_map(train_corpus, removePunctuation)
 train_corpus <- tm_map(train_corpus, removeWords, stopwords("english"))
 train_corpus <- tm_map(train_corpus, stripWhitespace)
 train_corpus <- tm_map(train_corpus, stemDocument)

 # create document-term matrix
 train_dtm <- DocumentTermMatrix(train_corpus)
 train_dtm <- removeSparseTerms(train_dtm, 0.995)
 train_dtm_df <- data.frame(as.matrix(train_dtm))
 colnames(train_dtm_df) <- make.names(colnames(train_dtm_df))
diff --git a/ui.R b/ui.R
 library(shiny)

 shinyUI(fluidPage(
    
    # Application title
    headerPanel("Text Sentiment Analyser"),
    
    sidebarLayout(
        # the control panel
        sidebarPanel(
            fileInput('file1', 'Choose text File',
                      accept=c('text/tsv', 
                               'text/tab-separated-values,text/plain', 
                               '.tsv')),
            tags$hr(),
            sliderInput("threshold",
                        "Positive sentiment threshold",
                        min = .1,
                        max = .99,
                        value = .5),
            tags$hr(),
            sliderInput("sparsity",
                        "Max. term sparsity",
                        min = .1,
                        max = .99,
                        value = .95)
        ),
        
        # Show a plot of the generated distribution
        mainPanel(
            plotOutput('distribution')
        )
    ),
    tags$hr(),
    fluidRow(
        # the results detail panel
        column(12,
            tableOutput('contents')
        )
    )
 ))
	library(shiny)
	library(tm)
	library(SnowballC)
	library(randomForest)

	options(mc.cores=1)

	build_model <- function(new_data_df, sparsity) {
	# Create new data corpus
	new_corpus <- Corpus(VectorSource(new_data_df$Text))
	new_corpus <- tm_map(new_corpus, content_transformer(tolower))
	new_corpus <- tm_map(new_corpus, removePunctuation)
	new_corpus <- tm_map(new_corpus, removeWords, stopwords("english"))
	new_corpus <- tm_map(new_corpus, stripWhitespace)
	new_corpus <- tm_map(new_corpus, stemDocument)

	# create document-term matrix
	new_dtm <- DocumentTermMatrix(new_corpus)
	new_dtm <- removeSparseTerms(new_dtm, sparsity)
	new_dtm_df <- as.data.frame(as.matrix(new_dtm))
	colnames(new_dtm_df) <- make.names(colnames(new_dtm_df))

	# intersect corpora and prepare final training data
	common_names <- intersect(colnames(train_dtm_df),colnames(new_dtm_df))
	new_dtm_df <- subset(new_dtm_df, select=names(new_dtm_df) %in% common_names)

	model_train_data_df <- cbind(train_data_df, subset(train_dtm_df, select=names(train_dtm_df) %in% common_names))
	model_train_data_df$Text <- NULL

	# train classifier
	model <- randomForest(Sentiment~.,data=model_train_data_df, ntree=50)

	# return value as a list
	list(model, new_dtm_df)
	}


	shinyServer(function(input, output) {

	output$contents <- renderTable({
	results()
	})

	output$distribution <- renderPlot({
	if (is.null(results()))
	return(NULL)
	d <- density(
	as.numeric(results()$Prob > input$threshold)
	)
	plot(
	d,
	xlim = c(0, 1),
	main=paste0("Sentiment Distribution (Prob > ", input$threshold, ")")
	)
	polygon(d, col="lightgrey", border="lightgrey")
	abline(v = input$threshold, col = "blue")
	})

	results <- reactive({
	inFile <- input$file1

	if (is.null(inFile))
	return(NULL)

	# load input data
	new_data_df <- read.csv(
	inFile$datapath,
	sep='\t',
	header=FALSE,
	quote = "",
	stringsAsFactor=F,
	col.names=c("Text")
	)

	model_and_data <- build_model(new_data_df, input$sparsity)

	pred <- predict(model_and_data[[1]], newdata=model_and_data[[2]], type="prob")

	new_data_df$Prob <- pred[,2]

	# return data frame
	new_data_df
	})
	})

	# This is the init code, that will be run when the web app loads

	# Load train and test data
	train_data_df <- read.csv(
	file = 'train_data.tsv',
	sep='\t',
	quote = "",
	header=FALSE,
	stringsAsFactor=F,
	col.names=c("Sentiment", "Text")
	)
	train_data_df$Sentiment <- as.factor(train_data_df$Sentiment)

	# Create training corpus for later re-use
	train_corpus <- Corpus(VectorSource(train_data_df$Text))
	train_corpus <- tm_map(train_corpus, content_transformer(tolower))
	train_corpus <- tm_map(train_corpus, removePunctuation)
	train_corpus <- tm_map(train_corpus, removeWords, stopwords("english"))
	train_corpus <- tm_map(train_corpus, stripWhitespace)
	train_corpus <- tm_map(train_corpus, stemDocument)

	# create document-term matrix
	train_dtm <- DocumentTermMatrix(train_corpus)
	train_dtm <- removeSparseTerms(train_dtm, 0.995)
	train_dtm_df <- data.frame(as.matrix(train_dtm))
	colnames(train_dtm_df) <- make.names(colnames(train_dtm_df))
	library(shiny)

	shinyUI(fluidPage(

	# Application title
	headerPanel("Text Sentiment Analyser"),

	sidebarLayout(
	# the control panel
	sidebarPanel(
	fileInput('file1', 'Choose text File',
	accept=c('text/tsv',
	'text/tab-separated-values,text/plain',
	'.tsv')),
	tags$hr(),
	sliderInput("threshold",
	"Positive sentiment threshold",
	min = .1,
	max = .99,
	value = .5),
	tags$hr(),
	sliderInput("sparsity",
	"Max. term sparsity",
	min = .1,
	max = .99,
	value = .95)
	),

	# Show a plot of the generated distribution
	mainPanel(
	plotOutput('distribution')
	)
	),
	tags$hr(),
	fluidRow(
	# the results detail panel
	column(12,
	tableOutput('contents')
	)
	)
	))