NickCH-K · February 22, 2020 09:57 · csqr · Feb 18, 2020
diff --git a/factortable.R b/factortable.R
 # Necessary functions
 # (never worked with functions in R before? Just run these lines, the functions will be stored in memory
 # sort of like if you load a package)

 cpct <- function(df, var, name, append) {
  # Limit to nonmissings and see how many nonmissings there are
  df <- df %>% filter_at(var, any_vars(!is.na(.)))
  N <- nrow(df)
  
  df <- df %>%
    # Get number and percent (relative to nonmissings) in each category
    rename_at(var, function(x) "Variable") %>%
    group_by(Variable) %>%
    summarize(Count = n(), Percent = paste0(round(n()/N*100,1),'%')) %>%
    ungroup() %>%
    # and shift the categories over a few spaces with ...
    mutate(Variable = paste0('...',as.character(Variable)))
    
  # add "header" row for each variable
  df <- bind_rows(
    tibble(Variable = name, Count = N, Percent = '100%'),
    df
  )
  
  # if there's going to be another variable after this one, add a blank space            
  if (append) {
    df <- df %>%
      add_row()
  }
  
  return(df)
 }

 # df is a data frame
 # vars is the column names
 # varnames are what you want those column names referred to as
 # colbreak is how many variables to go through before going to column 2
 # ... are stargazer arguments
 factortable <- function(df, vars = NULL, varnames = NULL, colbreak = NULL, ...) {
  # Defaults - all variables in data, varnames are just the colnames in data, no second column
  if (is.null(vars)) {
    vars <- names(df)
  }
  if (is.null(varnames)) {
    varnames <- vars
  }
  if (is.null(colbreak)) {
    colbreak <- length(vars)
  }
  
  # In the first column, create a cpct summary for each variable and stick 'em all together
  components <- 1:colbreak %>%
    map(function(x) cpct(df,vars[x],varnames[x], append = x < colbreak)) %>%
    bind_rows()
  
  # If there's a second column, get a cpct summary for each of those and stick 'em together too
  if (colbreak < length(vars)) {
    components2 <- (colbreak+1):length(vars) %>%
      map(function(x) cpct(df,vars[x],varnames[x], append = x < length(vars))) %>%
      bind_rows()
    
    # Make sure that the left and right columns have the same number of rows (add blanks)
    rowdiff <- nrow(components2) - nrow(components)
    if (rowdiff < 0) {
      for (i in 1:abs(rowdiff)) {
        components2 <- components2 %>%
          add_row()
      }
    }
    if (rowdiff > 0) {
      for (i in 1:abs(rowdiff)) {
        components <- components %>%
          add_row()
      }
    }
    # Then stick 'em together
    components <- bind_cols(components, 
                            components2 %>%
                              rename_all(function(x) paste0(x," ")))
  }
  
  # Return a stargazer table                                        
  stargazer(components %>% as.data.frame(), 
            summary = FALSE, rownames = FALSE,  ...) %>%
    return()
 }

 ##### The code you might want to modify for your own purposes starts here                                         
                                         
 # Necessary libraries
 library(tidyverse)
 library(purrr)
 library(stargazer)

 # gss_cat for example data, forcats not required
 library(forcats)
 data("gss_cat")

 # Columns to include
 vars <- c('marital','race','partyid','rincome')
 # What those variables should be called in the table
 varnames <- c('Marital Status','Race', 'Party ID','Income')

 # The 3 indicates to start the second column after the third variable
 # Everything after that is stargazer options
 factortable(gss_cat, vars, varnames, 3, 
            title = 'Background Characteristics',
            type = 'latex', out = 'demographic_summary.tex')
 # Note that if we'd bothered to set the factor ordering
 # It would be correct in the table too
	# Necessary functions
	# (never worked with functions in R before? Just run these lines, the functions will be stored in memory
	# sort of like if you load a package)

	cpct <- function(df, var, name, append) {
	# Limit to nonmissings and see how many nonmissings there are
	df <- df %>% filter_at(var, any_vars(!is.na(.)))
	N <- nrow(df)

	df <- df %>%
	# Get number and percent (relative to nonmissings) in each category
	rename_at(var, function(x) "Variable") %>%
	group_by(Variable) %>%
	summarize(Count = n(), Percent = paste0(round(n()/N*100,1),'%')) %>%
	ungroup() %>%
	# and shift the categories over a few spaces with ...
	mutate(Variable = paste0('...',as.character(Variable)))

	# add "header" row for each variable
	df <- bind_rows(
	tibble(Variable = name, Count = N, Percent = '100%'),
	df
	)

	# if there's going to be another variable after this one, add a blank space
	if (append) {
	df <- df %>%
	add_row()
	}

	return(df)
	}

	# df is a data frame
	# vars is the column names
	# varnames are what you want those column names referred to as
	# colbreak is how many variables to go through before going to column 2
	# ... are stargazer arguments
	factortable <- function(df, vars = NULL, varnames = NULL, colbreak = NULL, ...) {
	# Defaults - all variables in data, varnames are just the colnames in data, no second column
	if (is.null(vars)) {
	vars <- names(df)
	}
	if (is.null(varnames)) {
	varnames <- vars
	}
	if (is.null(colbreak)) {
	colbreak <- length(vars)
	}

	# In the first column, create a cpct summary for each variable and stick 'em all together
	components <- 1:colbreak %>%
	map(function(x) cpct(df,vars[x],varnames[x], append = x < colbreak)) %>%
	bind_rows()

	# If there's a second column, get a cpct summary for each of those and stick 'em together too
	if (colbreak < length(vars)) {
	components2 <- (colbreak+1):length(vars) %>%
	map(function(x) cpct(df,vars[x],varnames[x], append = x < length(vars))) %>%
	bind_rows()

	# Make sure that the left and right columns have the same number of rows (add blanks)
	rowdiff <- nrow(components2) - nrow(components)
	if (rowdiff < 0) {
	for (i in 1:abs(rowdiff)) {
	components2 <- components2 %>%
	add_row()
	}
	}
	if (rowdiff > 0) {
	for (i in 1:abs(rowdiff)) {
	components <- components %>%
	add_row()
	}
	}
	# Then stick 'em together
	components <- bind_cols(components,
	components2 %>%
	rename_all(function(x) paste0(x," ")))
	}

	# Return a stargazer table
	stargazer(components %>% as.data.frame(),
	summary = FALSE, rownames = FALSE, ...) %>%
	return()
	}

	##### The code you might want to modify for your own purposes starts here

	# Necessary libraries
	library(tidyverse)
	library(purrr)
	library(stargazer)

	# gss_cat for example data, forcats not required
	library(forcats)
	data("gss_cat")

	# Columns to include
	vars <- c('marital','race','partyid','rincome')
	# What those variables should be called in the table
	varnames <- c('Marital Status','Race', 'Party ID','Income')

	# The 3 indicates to start the second column after the third variable
	# Everything after that is stargazer options
	factortable(gss_cat, vars, varnames, 3,
	title = 'Background Characteristics',
	type = 'latex', out = 'demographic_summary.tex')
	# Note that if we'd bothered to set the factor ordering
	# It would be correct in the table too