Just discovered a nice little custom {skimr} summary stats by rOpenSci. I use it mainly in the R console for quick summary
- different summary stats for different classes
- distribution (unicode)
head
&tail
of raw data- easily customizable
Initial draft. Use at your own risk. Feel free to use it at your interactive package, .Rprofile, etc
- Name the functions as you like e.g.
su
stands for summary - Easily customize summary stats by classes
See rOpenSci: The skimr package for details.
#' Custom {skimr} summary stats
#'
#' `su` stands for _summary_. Returns different summary stats for different classes, distributions (unicode) for numeric class, raw data of first and last parts, etc.
#'
#' @details `su` and `su2` are the same except that `su2` provides extra _missingness_ functions `skimr::n_missing` and `skimr::complete_rate`.
#'
#' @param n_bins Number of histogram bars
#' @param digits Integer indicating the number of decimal places (round) or significant digits (signif) to be used. Negative values are allowed.
#' @param n_head_tail Number of `head` and `tail` of raw data.
#' @param base `skimr::sfl` that sets skimmers for all column types.
#' @param ... (none)
#'
#' @name custom_summary
NULL
#' @rdname custom_summary
#' @export
su <- function(..., n_bins = 13, digits = 2, n_head_tail = 3, base = skimr::sfl()) {
func <- skimr::skim_with(append = FALSE, base = base,
numeric = skimr::sfl(
mean = function(x) round(mean(x, na.rm = TRUE), digits),
sd = function(x) round(sd(x, na.rm = TRUE), digits),
`5%` = function(x) quantile(x, .05, na.rm = TRUE, names = FALSE),
`95%` = function(x) quantile(x, .95, na.rm = TRUE, names = FALSE),
dist = function(x) skimr::inline_hist(x , n_bins = n_bins),
head_tail = function(x)
paste(c(head(x, n_head_tail), tail(x, n_head_tail)), collapse = ' ')
),
factor = skim_default_headtail('factor', n_head_tail),
character = skim_default_headtail('character', n_head_tail),
Date = skim_default_headtail('Date', n_head_tail),
list = skim_default_headtail('list', n_head_tail),
logical = skim_default_headtail('logical', n_head_tail),
AsIs = skim_default_headtail('AsIs', n_head_tail),
complex = skim_default_headtail('complex', n_head_tail),
difftime = skim_default_headtail('difftime', n_head_tail),
POSIXct = skim_default_headtail('POSIXct', n_head_tail),
ts = skim_default_headtail('ts', n_head_tail),
)
func(...)
}
#' @rdname custom_summary
#' @export
su2 <- function(..., n_bins = 13, digits = 2, n_head_tail = 3,
base = skimr::sfl(n_missing = skimr:::n_missing,
complete_rate = skimr::complete_rate)) {
su(..., n_bins = n_bins, digits = digits, n_head_tail = n_head_tail, base=base)
}
# Internal func
skim_default_headtail <- function(skim_type, n_head_tail) {
skim_default <- skimr::get_default_skimmers(skim_type)
skim_headtail <- skimr::sfl(head_tail = function(x) {
# Workaround issues of `c` combining factors makes integers
if (skim_type == 'factor') x <- as.character(x)
paste(c(head(x, n_head_tail), tail(x, n_head_tail)), collapse = ' ')
})
do.call(skimr::sfl, c(skim_default[[1]], skim_headtail$funs))
}
library(skimr)
n <- nrow(iris)
my_data <- data.frame(iris,
alphabets = sample(letters, n, replace = TRUE),
date = seq.Date(Sys.Date() - n + 1 , Sys.Date(), by = 'day'),
time_series = ts(1:n, frequency = 4))
su(my_data)
su2(my_data)