Skip to content

Instantly share code, notes, and snippets.

@lcolladotor
Last active September 18, 2024 15:47
Show Gist options
  • Save lcolladotor/3d68a76f79c271d84d066368b3f3c4a8 to your computer and use it in GitHub Desktop.
Save lcolladotor/3d68a76f79c271d84d066368b3f3c4a8 to your computer and use it in GitHub Desktop.
Searching for Sugar vs Sweetener vs Salt
library("tidyverse")
library("here")
chocolate <- readRDS(here("data", "chocolate.RDS"))
## Base R solution
has_sugar <- sapply(
strsplit(
gsub(" ", "", gsub(".*-", "", chocolate$ingredients)),
","),
function(x) { any(x == "S") }
)
table(has_sugar, useNA = "ifany")
## Tidyverse way
chocolate %>%
mutate(ingredients_no_dash = str_replace(ingredients, ".*-", "")) %>%
mutate(ingredients_no_space = str_trim(ingredients_no_dash)) %>%
mutate(ingredients_individual = str_split(ingredients_no_space, ",")) %>%
mutate(sugar = purrr::map_lgl(ingredients_individual, ~ any(str_detect(.x, pattern = "^S$")))) %>%
count(sugar)
library("tidyverse")
library("here")
#> here() starts at /Users/leocollado/Dropbox/Code/jhustatcomputing
chocolate <- readRDS(here("data", "chocolate.RDS"))

## Search for Sugar while differentiating it against S* and Sa
## Aka, search only for S followed by a comma ("S,") or
## S at the end of the line (aka "S$" since "$" represents the end of the line)
table(chocolate$ingredients[
    stringr::str_detect(chocolate$ingredients, "S,|S$")
])
#> 
#>         2- B,S       3- B,S,C       3- B,S,L       3- B,S,V     4- B,S,C,L 
#>            718            999              8              3            286 
#>    4- B,S,C,Sa     4- B,S,C,V     4- B,S,V,L  5- B,S,C,L,Sa   5- B,S,C,V,L 
#>              5            141              5              1            184 
#>   5-B,S,C,V,Sa 6-B,S,C,V,L,Sa 
#>              6              4

## To search for the * symbol we have to "escape it" via
## two backslashes
table(chocolate$ingredients[
    stringr::str_detect(chocolate$ingredients, "S\\*")
])
#> 
#>      2- B,S*    3- B,S*,C   3- B,S*,Sa  4- B,S*,C,L 4- B,S*,C,Sa  4- B,S*,C,V 
#>           31           12            1            2           20            7 
#>  4- B,S*,V,L 
#>            3

## Search only for Salt
table(chocolate$ingredients[
    stringr::str_detect(chocolate$ingredients, "Sa")
])
#> 
#>     3- B,S*,Sa    4- B,S,C,Sa   4- B,S*,C,Sa  5- B,S,C,L,Sa   5-B,S,C,V,Sa 
#>              1              5             20              1              6 
#> 6-B,S,C,V,L,Sa 
#>              4

options(width = 120)
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.1 (2024-06-14)
#>  os       macOS Sonoma 14.5
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/New_York
#>  date     2024-09-12
#>  pandoc   3.2 @ /opt/homebrew/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.3   2024-06-21 [1] CRAN (R 4.4.0)
#>  colorspace    2.1-1   2024-07-26 [1] CRAN (R 4.4.0)
#>  digest        0.6.36  2024-06-23 [1] CRAN (R 4.4.0)
#>  dplyr       * 1.1.4   2023-11-17 [1] CRAN (R 4.4.0)
#>  evaluate      0.24.0  2024-06-10 [1] CRAN (R 4.4.0)
#>  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.4.0)
#>  fastmap       1.2.0   2024-05-15 [1] CRAN (R 4.4.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.4.0)
#>  fs            1.6.4   2024-04-25 [1] CRAN (R 4.4.0)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.4.0)
#>  ggplot2     * 3.5.1   2024-04-23 [1] CRAN (R 4.4.0)
#>  glue          1.7.0   2024-01-09 [1] CRAN (R 4.4.0)
#>  gtable        0.3.5   2024-04-22 [1] CRAN (R 4.4.0)
#>  here        * 1.0.1   2020-12-13 [1] CRAN (R 4.4.0)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.4.0)
#>  htmltools     0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#>  knitr         1.48    2024-07-07 [1] CRAN (R 4.4.0)
#>  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.4.0)
#>  lubridate   * 1.9.3   2023-09-27 [1] CRAN (R 4.4.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.4.0)
#>  munsell       0.5.1   2024-04-01 [1] CRAN (R 4.4.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.4.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.4.0)
#>  purrr       * 1.0.2   2023-08-10 [1] CRAN (R 4.4.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.4.0)
#>  readr       * 2.1.5   2024-01-10 [1] CRAN (R 4.4.0)
#>  reprex        2.1.1   2024-07-06 [1] CRAN (R 4.4.0)
#>  rlang         1.1.4   2024-06-04 [1] CRAN (R 4.4.0)
#>  rmarkdown     2.27    2024-05-17 [1] CRAN (R 4.4.0)
#>  rprojroot     2.0.4   2023-11-05 [1] CRAN (R 4.4.0)
#>  rstudioapi    0.16.0  2024-03-24 [1] CRAN (R 4.4.0)
#>  scales        1.3.0   2023-11-28 [1] CRAN (R 4.4.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.4.0)
#>  stringi       1.8.4   2024-05-06 [1] CRAN (R 4.4.0)
#>  stringr     * 1.5.1   2023-11-14 [1] CRAN (R 4.4.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.4.0)
#>  tidyr       * 1.3.1   2024-01-24 [1] CRAN (R 4.4.0)
#>  tidyselect    1.2.1   2024-03-11 [1] CRAN (R 4.4.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.4.0)
#>  timechange    0.3.0   2024-01-18 [1] CRAN (R 4.4.0)
#>  tzdb          0.4.0   2023-05-12 [1] CRAN (R 4.4.0)
#>  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.4.0)
#>  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.4.0)
#>  withr         3.0.1   2024-07-31 [1] CRAN (R 4.4.0)
#>  xfun          0.46    2024-07-18 [1] CRAN (R 4.4.0)
#>  yaml          2.3.10  2024-07-26 [1] CRAN (R 4.4.0)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Created on 2024-09-12 with reprex v2.1.1

library("tidyverse")
library("here")
chocolate <- readRDS(here("data", "chocolate.RDS"))
## Search for Sugar while differentiating it against S* and Sa
## Aka, search only for S followed by a comma ("S,") or
## S at the end of the line (aka "S$" since "$" represents the end of the line)
table(chocolate$ingredients[
stringr::str_detect(chocolate$ingredients, "S,|S$")
])
## To search for the * symbol we have to "escape it" via
## two backslashes
table(chocolate$ingredients[
stringr::str_detect(chocolate$ingredients, "S\\*")
])
## Search only for Salt
table(chocolate$ingredients[
stringr::str_detect(chocolate$ingredients, "Sa")
])
options(width = 120)
sessioninfo::session_info()
## Copy the above then use
## reprex::reprex(wd = here::here())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment