Last active
January 16, 2020 15:31
-
-
Save anamariaelek/ac70548f82c21f1141ac0b58dc34054a to your computer and use it in GitHub Desktop.
Nanopore sequencing summary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(shiny) | |
require(shinydashboard) | |
require(data.table) | |
require(ggplot2) | |
require(ggpubr) | |
require(stringr) | |
require(DT) | |
require(kableExtra) | |
require(knitr) | |
require(plotly) | |
data <- fread("http://hex.bioinfo.hr/~kristian/transfer/sequencing_summary.txt") | |
spuzve <- tolower(paste(c("Suberites","domuncula","Sdo", | |
"Eunapius","subterraneus","Esu", | |
"Ephydatia","muelleri","Emu", | |
"Clathrina","clathrus","Ccl"), collapse = "|")) | |
Spuzve_short <- c("Suberites|Sdo", | |
"Eunapius|Esu", | |
"Ephydatia|Emu", | |
"Clathrina|Ccl") | |
Spuzve_long <- c("Suberites domuncula", | |
"Eunapius subterraneus", | |
"Ephydatia muelleri", | |
"Clathrina clathrus") | |
data[, ':='(date = as.Date(str_extract(filename, "\\d{8}"), "%Y%m%d"), | |
flowcell = str_extract(filename, "F[[:alnum:]]+"), | |
species = str_extract(tolower(filename), spuzve))] | |
dates <- data[,.(date_init = unique(date)[1]), .(flowcell)] | |
data[dates, on = 'flowcell', date_init := i.date_init] | |
data[, ID := paste(date_init, species, sep = "-")] | |
setkey(data, ID) | |
cols <- c("filename", "num_events_template", "passes_filtering", | |
"sequence_length_template", "mean_qscore_template", | |
"date", "flowcell", "species", "date_init", "ID") | |
saveRDS(data[,..cols], "sequencing_summary.RDS") | |
sumfilt <- data[, | |
.(num=.N), | |
.(ID, passes_filtering, flowcell, species)][, | |
':='(N = sum(num), | |
perc = round(num/sum(num),2)),ID] | |
sumfiltcast <- dcast.data.table(sumfilt, | |
ID ~ passes_filtering, | |
value.var = c("species","flowcell","N","num","perc")) | |
sumfiltcast[, `:=`(N_FALSE = NULL, flowcell_FALSE=NULL, species_FALSE = NULL)] | |
old <- c("species_TRUE", "flowcell_TRUE", "N_TRUE", "num_TRUE", "num_FALSE", "perc_TRUE", "perc_FALSE") | |
new <- c("species", "flowcell", "N", "N_above", "N_below", "percent_above", "percent_below") | |
setnames(sumfiltcast, old, new) | |
setcolorder(sumfiltcast, c("ID", new)) | |
sumnuls <- data[num_events_template == 0, .(N_0=.N), .(ID)] | |
sumall <- sumfiltcast[sumnuls][, N0_percent := round(N_0/N, 2)] | |
sp <- lapply(sumall$species, function(x) { | |
pos <- grep(x, Spuzvetax, ignore.case = TRUE) | |
Spuzve_long[pos] | |
}) | |
sumall[, species := unlist(sp)] | |
sumdt <- data[sequence_length_template != 0, | |
.(min_length = min(sequence_length_template), | |
median_length = median(sequence_length_template), | |
max_length = max(sequence_length_template), | |
total_length = sum(sequence_length_template), | |
min_quality = min(mean_qscore_template), | |
median_quality = median(mean_qscore_template), | |
max_quality = max(mean_qscore_template)), | |
by = ID] | |
sumDT <- sumall[sumdt] | |
saveRDS(sumDT, "sumDT.RDS") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # GLOBAL # # | |
if (!require(shiny)) install.packages("shiny") | |
if (!require(shinydashboard)) install.packages("shinydashboard") | |
if (!require(data.table)) install.packages("data.table") | |
if (!require(ggplot2)) install.packages("ggplot2") | |
if (!require(ggpubr)) install.packages("ggpubr") | |
if (!require(stringr)) install.packages("stringr") | |
if (!require(DT)) install.packages("DT") | |
if (!require(kableExtra)) install.packages("kableExtra") | |
if (!require(knitr)) install.packages("knitr") | |
if (!require(plotly)) install.packages("plotly") | |
require(shiny) | |
require(shinydashboard) | |
require(data.table) | |
require(ggplot2) | |
require(ggpubr) | |
require(stringr) | |
require(DT) | |
require(kableExtra) | |
require(knitr) | |
require(plotly) | |
options(shiny.sanitize.errors = FALSE) | |
data <- readRDS("sequencing_summary.RDS") | |
sumDT <- readRDS("sumDT.RDS") | |
Spuzvetax <- c("Suberites domuncula (Sdo)", | |
"Eunapius subterraneus (Esu)", | |
"Ephydatia muelleri (Emu)", | |
"Clathrina clathrus (Ccl)") | |
# # UI # # | |
header <- dashboardHeader(title = "Nanopore Summary") | |
sidebar <- dashboardSidebar( | |
sidebarMenu( | |
menuItem("All runs", tabName = "allruns", icon = icon("th")), | |
menuItem("Selected run", tabName = "onerun", icon = icon("th-list")), | |
br() | |
), | |
sliderInput("bins", "bins in histograms", | |
min = 10, max = 100, value = 30, step = 5 | |
) | |
) | |
body <- dashboardBody( | |
tabItems( | |
tabItem(tabName = "allruns", | |
h2("Summary of all sequencing runs"), | |
hr(), | |
fluidRow( | |
box( | |
title = "Table summary", | |
width = 12, | |
status = "warning", solidHeader = TRUE, | |
collapsible = TRUE, | |
DT::dataTableOutput('outsumDT'), | |
checkboxGroupInput( | |
"show_vars", | |
label = "Choose columns:", | |
choices = names(sumDT), | |
selected = names(sumDT), | |
inline = TRUE | |
), | |
downloadButton( | |
"downloadData", | |
label = "Download" | |
) | |
) | |
), | |
fluidRow( | |
box( | |
title = "Quality", | |
width = 6, collapsible = TRUE, collapsed = FALSE, | |
status = "warning", solidHeader = TRUE, | |
plotlyOutput('qualplot'), | |
br(), | |
textOutput('qualthres'), | |
checkboxInput( | |
"rmnulls", | |
"Remove reads with length 0", | |
value = FALSE, | |
width = NULL) | |
), | |
box( | |
title = "Length", | |
width = 6, collapsible = TRUE, collapsed = FALSE, | |
status = "warning", solidHeader = TRUE, | |
plotlyOutput('lenplot'), | |
br(), | |
checkboxInput( | |
"loglens", | |
"Lengths on log scale (removes reads with length 0)", | |
value = TRUE, | |
width = NULL) | |
) | |
) | |
), | |
tabItem(tabName = "onerun", | |
h2("Summary of a single sequencing run"), | |
hr(), | |
fluidRow( | |
valueBoxOutput("info", width = 4), | |
valueBoxOutput("filterbox", width = 4), | |
valueBoxOutput("flowcell", width = 4) | |
), | |
fluidRow( | |
box( | |
title = "Reads summary", | |
width = 12, | |
status = "warning", solidHeader = TRUE, | |
background = "yellow", | |
collapsible = FALSE, | |
tableOutput('outrowDT') | |
) | |
), | |
fluidRow( | |
tabBox( | |
title = "Reads length", | |
side = "right", width = 6, | |
tabPanel("histogram", plotOutput('histlen')), | |
tabPanel("density", plotOutput('denslen')) | |
), | |
tabBox( | |
title = "Reads quality", | |
side = "right", width = 6, | |
tabPanel("histogram", plotOutput('histq')), | |
tabPanel("density", plotOutput('densq')) | |
) | |
), | |
fluidRow( | |
tabBox( | |
title = "Reads length vs reads quality", | |
side = "right", width = 6, | |
#tabPanel("hex", plotOutput("hex")), | |
tabPanel("density", plotOutput("dens2d")) | |
) | |
) | |
) | |
) | |
) | |
ui <- dashboardPage(header, sidebar, body) | |
# # SERVER # # | |
server <- function(input, output) { | |
# RUNS TABLE SUMMARY | |
output$outsumDT <- renderDT( | |
sumDT[, input$show_vars, with=FALSE], | |
options = list( | |
scrollX = TRUE, | |
pageLength = 20, | |
lengthMenu = c(10, 20, 50)), | |
selection = list(mode = 'single', selected = 1), | |
escape = FALSE, | |
callback = JS( | |
'table.on("click.dt", "tr", function() { | |
tabs = $(".sidebar-menu li a"); | |
$(tabs[1]).click();})') | |
) | |
output$downloadData <- downloadHandler( | |
filename = "nanopore_sequencing_summary.csv", | |
content = function(file) | |
{ | |
write.csv(sumDT[,input$show_vars, with=FALSE], | |
file, row.names = FALSE) | |
} | |
) | |
# RUNS PLOT SUMMARY | |
# For excluding reads with length 0 | |
pData <- reactive({ | |
if (input$rmnulls == TRUE) | |
data[num_events_template != 0] | |
else data | |
}) | |
thres <- data[passes_filtering == TRUE, | |
min(mean_qscore_template)] | |
l <- list( | |
x = 100, y = 0.5, | |
font = list( | |
family = "sans-serif", | |
size = 12 | |
) | |
) | |
output$qualplot <- renderPlotly({ | |
qp <- ggplot(pData(), | |
aes(x = mean_qscore_template, colour = ID)) + | |
geom_density(aes(y = ..count..)) + | |
scale_colour_hue(h = c(90, 300)) + | |
geom_vline(xintercept = thres, | |
colour = "red", alpha = 0.2) + | |
theme_pubr(border = TRUE, legend = "top") + | |
scale_y_continuous(labels = function(x) | |
format(x, digits = 1, scientific = TRUE)) + | |
guides(colour = guide_legend(title = NULL)) + | |
labs(x = "mean quality score") | |
qp <- ggplotly(qp, tooltip = c("colour")) | |
layout(qp, legend = l, margin = list(l = 100)) | |
}) | |
output$qualthres <- renderText( | |
"(red vertical line indicates quality threshold value)" | |
) | |
output$lenplot <- renderPlotly({ | |
pp <- ggplot(pData(), | |
aes(x = sequence_length_template, colour = ID)) + | |
geom_density(aes(y = ..count..)) + | |
scale_colour_hue(h = c(90, 300)) + | |
theme_pubr(border = TRUE, legend = "top") + | |
scale_y_continuous(labels = function(x) | |
format(x, digits = 1, scientific = TRUE)) + | |
guides(colour = guide_legend(title = NULL)) + | |
labs(x = "sequence length (bp)") | |
if (input$loglens == TRUE) | |
pp <- pp + scale_x_log10() | |
pp <- ggplotly(pp, tooltip = c("colour")) | |
layout(pp, legend = l, margin = list(l = 100)) | |
}) | |
# CHOSEN RUN | |
selectedData <- reactive({ | |
req(input$outsumDT_rows_selected) | |
row <- input$outsumDT_rows_selected | |
data[ID == sumDT[row[1], ID]] %>% | |
mutate("passes\nfiltering" = passes_filtering) %>% | |
select(-passes_filtering) | |
}) | |
output$info <- renderValueBox({ | |
req(input$outsumDT_rows_selected) | |
valueBox( | |
selectedData()$date_init[1], | |
grep(selectedData()$species[1], Spuzvetax, | |
ignore.case = TRUE, value = TRUE), | |
icon = icon("list"), | |
color = "purple" | |
) | |
}) | |
output$filterbox <- renderValueBox({ | |
req(input$outsumDT_rows_selected) | |
valueBox( | |
paste0( | |
round( | |
sumDT[input$outsumDT_rows_selected, | |
percent_above], | |
2) * 100, | |
"%"), | |
"passes filtering", | |
icon = icon("thumbs-up", lib = "glyphicon"), | |
color = "teal" | |
) | |
}) | |
output$flowcell <- renderValueBox({ | |
req(input$outsumDT_rows_selected) | |
valueBox( | |
unique(selectedData()$flowcell), | |
"flowcell", | |
icon = icon("cog"), | |
color = "light-blue" | |
) | |
}) | |
#output$outrowDT <- renderDT( | |
# sumDT[c(input$outsumDT_rows_selected, 1)[1], input$show_vars, with=FALSE], | |
# options = list(dom = 't', scrollX = TRUE, lengthChange = FALSE), | |
# selection = list(mode = 'none') | |
#) | |
output$outrowDT <- function(){ | |
req(input$outsumDT_rows_selected) | |
show_vars_less <- input$show_vars[ | |
!(input$show_vars %in% c("ID", "species", "flowcell", "percent_above", "percent_below"))] | |
sumDT[input$outsumDT_rows_selected, show_vars_less, with=FALSE] %>% | |
knitr::kable("html") %>% | |
kable_styling("striped") %>% | |
#scroll_box(width = "100%") %>% #throws error | |
row_spec(0, align = "center") %>% | |
row_spec(1, color = "white", background = "#f39c12", align = "center") | |
#add_header_above(c(" ", "Group 1" = 5, "Group 2" = 6)) | |
} | |
# CHOSEN RUN PLOTS | |
output$densq <- renderPlot({ | |
req(input$outsumDT_rows_selected) | |
ggplot(selectedData(), aes(x = mean_qscore_template, | |
fill = `passes\nfiltering`, | |
colour = `passes\nfiltering`)) + | |
geom_density(aes(y = ..count..), alpha = 0.5) + | |
theme_pubr(border = TRUE, legend = "right") + | |
labs(x = "mean quality score", y = "read count") | |
}) | |
output$denslen <- renderPlot({ | |
req(input$outsumDT_rows_selected) | |
ggplot(selectedData(), | |
aes(x = sequence_length_template, | |
fill = `passes\nfiltering`, | |
colour = `passes\nfiltering`)) + | |
geom_density(aes(y = ..count..), alpha = 0.5) + | |
theme_pubr(border = TRUE, legend = "right") + | |
labs(x = "sequence length (bp)", y = "read count") | |
}) | |
output$histq <- renderPlot({ | |
req(input$outsumDT_rows_selected) | |
ggplot(selectedData(), aes(x = mean_qscore_template, fill = `passes\nfiltering`)) + | |
geom_histogram(bins = input$bins, alpha = 0.5, position = "stack") + | |
theme_pubr(border = TRUE, legend = "right") + | |
labs(x = "mean quality score", y = "read count") | |
}) | |
output$histlen <- renderPlot({ | |
req(input$outsumDT_rows_selected) | |
ggplot(selectedData(), aes(x = sequence_length_template, fill = `passes\nfiltering`)) + | |
geom_histogram(bins = input$bins, alpha = 0.5, position = "dodge") + | |
theme_pubr(border = TRUE, legend = "right") + | |
labs(x = "sequence length (bp)", y = "read count") | |
}) | |
# output$hex <- renderPlot({ | |
# ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template, | |
# colour = `passes\nfiltering` | |
# )) + | |
# geom_hex(alpha = 0.85) + | |
# #facet_wrap(~`passes\nfiltering`) + | |
# scale_fill_gradient(name = "counts", trans = "log", c(2, 10, 50, 250, 1250, 6000)) + | |
# theme_pubr(border = TRUE, legend = "right") | |
# }) | |
output$dens2d <- renderPlot({ | |
ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template, | |
colour = `passes\nfiltering`)) + | |
geom_density2d(bins = input$bins) + | |
theme_pubr(border = TRUE, legend = "right") + | |
labs(x = "sequence length (bp)", y = "mean quality score") | |
}) | |
} | |
# # RUN # # | |
shinyApp(ui, server) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment