## This is my population
n = 30
pop <- iris[,1]
hist(pop)
mean(pop)
sampling_dist <- replicate(10^4, {
samp <- sample(pop, n)
mean(samp)
})
p <- 5 / 100
u <- 1 - p/2
l <- 1 - u
## 95% probability that a sample has a mean between this value
## likewise, 95% probability that a sample captures pop parameter
## between this value
hist(sampling_dist)
(q <- quantile(sampling_dist, c(l, u)))
abline(v = q[1], col="red", lwd=3)
abline(v = q[2], col="red", lwd=3)
mean(sampling_dist)
mean(pop)
var(sampling_dist)
var(pop) / n
## We got this sample
set.seed(1234)
samp <- sample(pop, n)
boot_sample_means <- replicate(10^4, {
boot_samp <- sample(samp, replace = TRUE)
mean(boot_samp)
})
## 95% probability that this sample captured the population parameter
hist(boot_sample_means)
(q <- quantile(boot_sample_means, c(l, u)))
abline(v = q[1], col="red", lwd=3)
abline(v = q[2], col="red", lwd=3)
Let
-
$X = \left{x_1, x_2, x_3, \ldots \right}$ be the pop-
$m$ be some sample of$X$
-
$$ \begin{aligned} \overline{x} &= \frac{1}{N} \sum^n_{i=1} \left[ \overline{x}{ij} \right] \ &= \frac{1}{N} \sum^n{i=1} \left[ \frac{1}{N} \sum^n_{i=1} \left[ m_{ij} \right] \right] \ &= \mu \end{aligned} $$
Where:
-
$\sigma$ is defined as above -
$n$ is sample size
library(tidyverse)
library(Lock5Data)
d <- Lock5Data::NutritionStudy
head(d)
str(d)
glimpse(d)
## Plot it ---------------------------------------------------------------------
## Tidyverse....................................................................
## Choose vitamin and fat
(d <- tibble(d) |>
select(Vitamin, Fat) |> # <- d[,c("Vitamin", "Fat")]
mutate(Vitamin = factor(Vitamin))) # <- factor(d$Vitamin)
d |>
ggplot(aes(x = Vitamin, y = Fat, col = Vitamin)) +
geom_jitter(width = 0.052, height = 0) +
geom_boxplot() +
theme_classic() +
labs(title = "Fat for different Vitamins")
## Base Plot ...................................................................
d <- d[, c("Vitamin", "Fat")]
d$Vitamin <- factor(d$Vitamin)
cols <- 1:3
boxplot(Fat ~ Vitamin, d, col = 1:3, main = "Fat for Vitamins")
## Part 2 ----------------------------------------------------------------------
## Tidyverse....................................................................
(d <- Lock5Data::NutritionStudy |>
select(Fat, Calories)) |>
glimpse()
d |>
ggplot(aes(x = Fat, y = Calories, col = Calories)) +
geom_point() +
stat_smooth(se = FALSE)
## Base Plot ...................................................................
## Y ~ X
f <- Calories ~ Fat
## Create a plot
plot(f, d,
main = "Calories given Fat", col = "royalblue",
cex = 1, # Size
pch = 19 # Symbol
)
## Create a Linear Regression
mod <- lm(f, d)
## Add the model over the top
abline(mod, col = "red", lty = "dashed", lwd = 2)