Skip to content

Instantly share code, notes, and snippets.

@NickCH-K
Last active February 22, 2020 09:57
Show Gist options
  • Save NickCH-K/0406b26f494c46bcef0b0abeacc7f1c8 to your computer and use it in GitHub Desktop.
Save NickCH-K/0406b26f494c46bcef0b0abeacc7f1c8 to your computer and use it in GitHub Desktop.
Factor Variable Summary Table
# Necessary functions
# (never worked with functions in R before? Just run these lines, the functions will be stored in memory
# sort of like if you load a package)
cpct <- function(df, var, name, append) {
# Limit to nonmissings and see how many nonmissings there are
df <- df %>% filter_at(var, any_vars(!is.na(.)))
N <- nrow(df)
df <- df %>%
# Get number and percent (relative to nonmissings) in each category
rename_at(var, function(x) "Variable") %>%
group_by(Variable) %>%
summarize(Count = n(), Percent = paste0(round(n()/N*100,1),'%')) %>%
ungroup() %>%
# and shift the categories over a few spaces with ...
mutate(Variable = paste0('...',as.character(Variable)))
# add "header" row for each variable
df <- bind_rows(
tibble(Variable = name, Count = N, Percent = '100%'),
df
)
# if there's going to be another variable after this one, add a blank space
if (append) {
df <- df %>%
add_row()
}
return(df)
}
# df is a data frame
# vars is the column names
# varnames are what you want those column names referred to as
# colbreak is how many variables to go through before going to column 2
# ... are stargazer arguments
factortable <- function(df, vars = NULL, varnames = NULL, colbreak = NULL, ...) {
# Defaults - all variables in data, varnames are just the colnames in data, no second column
if (is.null(vars)) {
vars <- names(df)
}
if (is.null(varnames)) {
varnames <- vars
}
if (is.null(colbreak)) {
colbreak <- length(vars)
}
# In the first column, create a cpct summary for each variable and stick 'em all together
components <- 1:colbreak %>%
map(function(x) cpct(df,vars[x],varnames[x], append = x < colbreak)) %>%
bind_rows()
# If there's a second column, get a cpct summary for each of those and stick 'em together too
if (colbreak < length(vars)) {
components2 <- (colbreak+1):length(vars) %>%
map(function(x) cpct(df,vars[x],varnames[x], append = x < length(vars))) %>%
bind_rows()
# Make sure that the left and right columns have the same number of rows (add blanks)
rowdiff <- nrow(components2) - nrow(components)
if (rowdiff < 0) {
for (i in 1:abs(rowdiff)) {
components2 <- components2 %>%
add_row()
}
}
if (rowdiff > 0) {
for (i in 1:abs(rowdiff)) {
components <- components %>%
add_row()
}
}
# Then stick 'em together
components <- bind_cols(components,
components2 %>%
rename_all(function(x) paste0(x," ")))
}
# Return a stargazer table
stargazer(components %>% as.data.frame(),
summary = FALSE, rownames = FALSE, ...) %>%
return()
}
##### The code you might want to modify for your own purposes starts here
# Necessary libraries
library(tidyverse)
library(purrr)
library(stargazer)
# gss_cat for example data, forcats not required
library(forcats)
data("gss_cat")
# Columns to include
vars <- c('marital','race','partyid','rincome')
# What those variables should be called in the table
varnames <- c('Marital Status','Race', 'Party ID','Income')
# The 3 indicates to start the second column after the third variable
# Everything after that is stargazer options
factortable(gss_cat, vars, varnames, 3,
title = 'Background Characteristics',
type = 'latex', out = 'demographic_summary.tex')
# Note that if we'd bothered to set the factor ordering
# It would be correct in the table too
@csqr
Copy link

csqr commented Feb 18, 2020

This is great, just what I was looking for. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment