Skip to content

Instantly share code, notes, and snippets.

@vpnagraj
Last active August 12, 2024 22:08
Show Gist options
  • Save vpnagraj/ad5d9e90769300d3e446d45e5bef026e to your computer and use it in GitHub Desktop.
Save vpnagraj/ad5d9e90769300d3e446d45e5bef026e to your computer and use it in GitHub Desktop.
Example of reprex in R for asking *and* answering questions. The question.R script generates the content for question.md. The answer.R script generates the content for answer.md. The proof.R script uses the answer with data in example.csv.
library(tidyverse)

## define the example tibble
dat <-
  tribble(
  ~individual, ~Q1_A, ~Q1_B, ~Q1_C, ~Q1_D,   ~Q2_A, ~Q2_B, ~Q2_C,   ~Q2_D,
  "alice",    NA, "cat",    NA,    NA, "tacos",    NA,    NA,      NA,
  "bob", "dog",    NA,    NA,    NA,      NA,    NA,    NA, "pizza"
) 


## first ... use pivot_longer to get all of the "question columns" (starting with Q)
## this will split the question columns into two columns (question and choice) by the "_"
## the values will be the answer
## the values_drop_na=TRUE will exclude the responses that have NA (i.e., the "non choices")
dat %>%
  pivot_longer(cols = starts_with("Q"), names_prefix = "", names_sep = "_", names_to = c("question","choice"), values_to = c("answer"), values_drop_na = TRUE)
#> # A tibble: 4 × 4
#>   individual question choice answer
#>   <chr>      <chr>    <chr>  <chr> 
#> 1 alice      Q1       B      cat   
#> 2 alice      Q2       A      tacos 
#> 3 bob        Q1       A      dog   
#> 4 bob        Q2       D      pizza

Created on 2024-08-12 with reprex v2.0.2

reprex::reprex({
library(tidyverse)
## define the example tibble
dat <-
tribble(
~individual, ~Q1_A, ~Q1_B, ~Q1_C, ~Q1_D, ~Q2_A, ~Q2_B, ~Q2_C, ~Q2_D,
"alice", NA, "cat", NA, NA, "tacos", NA, NA, NA,
"bob", "dog", NA, NA, NA, NA, NA, NA, "pizza"
)
## first ... use pivot_longer to get all of the "question columns" (starting with Q)
## this will split the question columns into two columns (question and choice) by the "_"
## the values will be the answer
## the values_drop_na=TRUE will exclude the responses that have NA (i.e., the "non choices")
dat %>%
pivot_longer(cols = starts_with("Q"), names_prefix = "", names_sep = "_", names_to = c("question","choice"), values_to = c("answer"), values_drop_na = TRUE)
})
Name Income_A Income_B Income_C Income_D Age_A Age_B Age_C Age_D Alcohol_A Alcohol_B Alcohol_C Alcohol_D Exercise_A Exercise_B Exercise_C Exercise_D Politics_A Politics_B Politics_C Politics_D
Alice Jones 100-200k >65 1 drink daily >3 times weekly Republican
Bob Williams 50-100k 18-25 >3 drinks daily 2-3 times weekly Democrat
Carmen Smith >200k 0 40-64 None 1 time weekly Prefer not to say
library(tidyverse)
## read in the example dataset
dat <- read_csv("example.csv")
## take a look at how the data is formatted
dat
## tailor the approach in answer.R to the dataset in question
dat %>%
pivot_longer(cols = c(starts_with("Inc"),starts_with("Age"),starts_with("Alc"),starts_with("Exe"),starts_with("Pol")), names_prefix = "", names_sep = "_", names_to = c("question","choice"), values_to = c("answer"), values_drop_na = TRUE)

i am trying to format answers to multiple choice questions in a data frame

i would like one row per respondee with question, choice, and answer in separate columns

library(tidyverse)

## here is what i have
tribble(
  ~individual, ~Q1_A, ~Q1_B, ~Q1_C, ~Q1_D,   ~Q2_A, ~Q2_B, ~Q2_C,   ~Q2_D,
  "alice",    NA, "cat",    NA,    NA, "tacos",    NA,    NA,      NA,
  "bob", "dog",    NA,    NA,    NA,      NA,    NA,    NA, "pizza"
)
#> # A tibble: 2 × 9
#>   individual Q1_A  Q1_B  Q1_C  Q1_D  Q2_A  Q2_B  Q2_C  Q2_D 
#>   <chr>      <chr> <chr> <lgl> <lgl> <chr> <lgl> <lgl> <chr>
#> 1 alice      <NA>  cat   NA    NA    tacos NA    NA    <NA> 
#> 2 bob        dog   <NA>  NA    NA    <NA>  NA    NA    pizza


## here is what i want
tribble(
  ~individual, ~question, ~choice, ~answer,
  "alice",      "Q1",     "B",   "cat",
  "alice",      "Q2",     "A", "tacos",
  "bob",      "Q1",     "A",   "dog",
  "bob",      "Q2",     "D", "pizza"
)
#> # A tibble: 4 × 4
#>   individual question choice answer
#>   <chr>      <chr>    <chr>  <chr> 
#> 1 alice      Q1       B      cat   
#> 2 alice      Q2       A      tacos 
#> 3 bob        Q1       A      dog   
#> 4 bob        Q2       D      pizza

Created on 2024-08-12 with reprex v2.0.2

## i am trying to format answers to multiple choice questions in a data frame
## i would like one row per respondee with question, choice, and answer in separate columns
reprex::reprex({
library(tidyverse)
## here is what i have
tribble(
~individual, ~Q1_A, ~Q1_B, ~Q1_C, ~Q1_D, ~Q2_A, ~Q2_B, ~Q2_C, ~Q2_D,
"alice", NA, "cat", NA, NA, "tacos", NA, NA, NA,
"bob", "dog", NA, NA, NA, NA, NA, NA, "pizza"
)
## here is what i want
tribble(
~individual, ~question, ~choice, ~answer,
"alice", "Q1", "B", "cat",
"alice", "Q2", "A", "tacos",
"bob", "Q1", "A", "dog",
"bob", "Q2", "D", "pizza"
)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment