Skip to content

Instantly share code, notes, and snippets.

@NickCH-K
Last active December 27, 2022 21:48
Show Gist options
  • Save NickCH-K/a020d4c037c9dcb4b3d82c4389b45d82 to your computer and use it in GitHub Desktop.
Save NickCH-K/a020d4c037c9dcb4b3d82c4389b45d82 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(purrr)
predfunc = function(x, xdat) x[1] + x[2]*xdat
residsum = function(x, xdat, ydat, pwr) sum(abs((predfunc(x, xdat)-ydat))^pwr)
set.seed(1000)
exdat = bind_rows(
tibble(label = 'Outlier X',
x = rnorm(50)) %>%
mutate(y = x+rnorm(50, 0, .3)) %>%
mutate(x = case_when(row_number() >= 47 ~ x + 4,
TRUE ~ x)),
tibble(label = 'Positive Slope, Low Noise',
x = rnorm(50)) %>%
mutate(y = x+rnorm(50, 0, .3)),
tibble(label = 'Positive Slope, High Noise',
x = rnorm(50)) %>%
mutate(y = x + rnorm(50, 0, 2)),
tibble(label = 'Nonlinear Relationship',
x = rnorm(50,0,.75)) %>%
mutate(y = x + x^2 + rnorm(50, 0, .3)),
tibble(label = 'Outlier Y',
x = rnorm(50)) %>%
mutate(y = case_when(
row_number() < 47 ~ x + rnorm(50, 0, .3),
TRUE ~ x + rnorm(50,4,.3)
)),
tibble(label = 'Outlier X & Y',
x = c(rnorm(47),rnorm(3,4))) %>%
mutate(y = x + rnorm(50))
)
domin = function(lb, pw) {
print(paste('Working on',lb,'to the',pw,'power at',Sys.time()))
xdat = exdat %>%
filter(label == lb) %>%
pull(x)
ydat = exdat %>%
filter(label == lb) %>%
pull(y)
res = optim(c(0,0),residsum, xdat=xdat, ydat=ydat, pwr = pw)
tibble(label = lb,
power = as.character(pw),
intercept = res$par[1],
slope = res$par[2])
}
results = map_dfr(unique(exdat$label),
\(x) bind_rows(map_dfr(c(.5, 1, 2, 3, 4, 5, 100), \(y) domin(x, y)),
tibble(label = x,
power = 'True',
intercept = 0,
slope = 1)))
results = results %>%
mutate(power = factor(power,
levels = c('0.5','1','2','3','4','5','100','True')))
ggplot(exdat, aes(x = x, y = y, group = label)) +
geom_point() +
geom_line(aes(x = x, y = y),
data = tibble(x = (-200:200)/100,
label = 'Nonlinear Relationship') %>%
mutate(y = x+x^2),
color = 'red') +
geom_abline(aes(intercept = intercept, slope = slope, color = power),
data = results %>% filter(label != 'Nonlinear Relationship' | power != 'True')) +
facet_wrap(~factor(label,levels = c('Positive Slope, Low Noise',
'Positive Slope, High Noise',
'Nonlinear Relationship',
'Outlier X','Outlier Y','Outlier X & Y')), ncol = 2) +
scale_color_manual(values = c(paletteer_dynamic('cartography::blue.pal',7),'red')) +
theme_classic() +
theme(text = element_text(family = 'serif', size = 13),
axis.title.y = element_text(hjust = 1, angle = 0)) +
labs(color = 'Power',
caption = scales::label_wrap(100)('True model always: X is normal, e is normal, Y = X + e or Y = X + X^2 + e. Outliers add on a shift afterwards, and different graphs have different variances.'),
title = 'Why the sum of SQUARES?',
subtitle = scales::label_wrap(80)('Bivariate regression slopes minimizing the sum of residuals to different powers.'),
x = 'X',y = 'Y')
# Discrete color
ggplot(exdat, aes(x = x, y = y, group = label)) +
geom_point() +
geom_line(aes(x = x, y = y),
data = tibble(x = (-200:200)/100,
label = 'Nonlinear Relationship') %>%
mutate(y = x+x^2),
color = scales::hue_pal()(8)[8]) +
geom_abline(aes(intercept = intercept, slope = slope, color = power),
data = results %>% filter(label != 'Nonlinear Relationship' | power != 'True')) +
facet_wrap(~factor(label,levels = c('Positive Slope, Low Noise',
'Positive Slope, High Noise',
'Nonlinear Relationship',
'Outlier X','Outlier Y','Outlier X & Y')), ncol = 2) +
theme_classic() +
theme(text = element_text(family = 'serif', size = 13),
axis.title.y = element_text(hjust = 1, angle = 0)) +
labs(color = 'Power',
caption = scales::label_wrap(100)('True model always: X is normal, e is normal, Y = X + e or Y = X + X^2 + e. Outliers add on a shift afterwards, and different graphs have different variances.'),
title = 'Why the sum of SQUARES?',
subtitle = scales::label_wrap(80)('Bivariate regression slopes minimizing the sum of residuals to different powers.'),
x = 'X',y = 'Y')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment