Last active
February 22, 2020 09:57
-
-
Save NickCH-K/0406b26f494c46bcef0b0abeacc7f1c8 to your computer and use it in GitHub Desktop.
Factor Variable Summary Table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Necessary functions | |
# (never worked with functions in R before? Just run these lines, the functions will be stored in memory | |
# sort of like if you load a package) | |
cpct <- function(df, var, name, append) { | |
# Limit to nonmissings and see how many nonmissings there are | |
df <- df %>% filter_at(var, any_vars(!is.na(.))) | |
N <- nrow(df) | |
df <- df %>% | |
# Get number and percent (relative to nonmissings) in each category | |
rename_at(var, function(x) "Variable") %>% | |
group_by(Variable) %>% | |
summarize(Count = n(), Percent = paste0(round(n()/N*100,1),'%')) %>% | |
ungroup() %>% | |
# and shift the categories over a few spaces with ... | |
mutate(Variable = paste0('...',as.character(Variable))) | |
# add "header" row for each variable | |
df <- bind_rows( | |
tibble(Variable = name, Count = N, Percent = '100%'), | |
df | |
) | |
# if there's going to be another variable after this one, add a blank space | |
if (append) { | |
df <- df %>% | |
add_row() | |
} | |
return(df) | |
} | |
# df is a data frame | |
# vars is the column names | |
# varnames are what you want those column names referred to as | |
# colbreak is how many variables to go through before going to column 2 | |
# ... are stargazer arguments | |
factortable <- function(df, vars = NULL, varnames = NULL, colbreak = NULL, ...) { | |
# Defaults - all variables in data, varnames are just the colnames in data, no second column | |
if (is.null(vars)) { | |
vars <- names(df) | |
} | |
if (is.null(varnames)) { | |
varnames <- vars | |
} | |
if (is.null(colbreak)) { | |
colbreak <- length(vars) | |
} | |
# In the first column, create a cpct summary for each variable and stick 'em all together | |
components <- 1:colbreak %>% | |
map(function(x) cpct(df,vars[x],varnames[x], append = x < colbreak)) %>% | |
bind_rows() | |
# If there's a second column, get a cpct summary for each of those and stick 'em together too | |
if (colbreak < length(vars)) { | |
components2 <- (colbreak+1):length(vars) %>% | |
map(function(x) cpct(df,vars[x],varnames[x], append = x < length(vars))) %>% | |
bind_rows() | |
# Make sure that the left and right columns have the same number of rows (add blanks) | |
rowdiff <- nrow(components2) - nrow(components) | |
if (rowdiff < 0) { | |
for (i in 1:abs(rowdiff)) { | |
components2 <- components2 %>% | |
add_row() | |
} | |
} | |
if (rowdiff > 0) { | |
for (i in 1:abs(rowdiff)) { | |
components <- components %>% | |
add_row() | |
} | |
} | |
# Then stick 'em together | |
components <- bind_cols(components, | |
components2 %>% | |
rename_all(function(x) paste0(x," "))) | |
} | |
# Return a stargazer table | |
stargazer(components %>% as.data.frame(), | |
summary = FALSE, rownames = FALSE, ...) %>% | |
return() | |
} | |
##### The code you might want to modify for your own purposes starts here | |
# Necessary libraries | |
library(tidyverse) | |
library(purrr) | |
library(stargazer) | |
# gss_cat for example data, forcats not required | |
library(forcats) | |
data("gss_cat") | |
# Columns to include | |
vars <- c('marital','race','partyid','rincome') | |
# What those variables should be called in the table | |
varnames <- c('Marital Status','Race', 'Party ID','Income') | |
# The 3 indicates to start the second column after the third variable | |
# Everything after that is stargazer options | |
factortable(gss_cat, vars, varnames, 3, | |
title = 'Background Characteristics', | |
type = 'latex', out = 'demographic_summary.tex') | |
# Note that if we'd bothered to set the factor ordering | |
# It would be correct in the table too |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is great, just what I was looking for. Thanks!