oc
) in the EPIDURAL
data set from the PASWR2
package. Use a for
loop and the infer
pipeline to construct your confidence intervals.library(PASWR2)
library(tidyverse)
library(infer)
summary(EPIDURAL)
doctor kg cm ease
Dr. A:23 Min. : 48.00 Min. :152.0 Difficult :20
Dr. B:21 1st Qu.: 72.00 1st Qu.:160.0 Easy :57
Dr. C:21 Median : 84.00 Median :165.0 Impossible: 8
Dr. D:20 Mean : 86.08 Mean :165.4
3rd Qu.: 94.00 3rd Qu.:170.0
Max. :168.00 Max. :185.0
treatment oc complications
Hamstring Stretch :35 Min. : 0.000 Failure - person got dizzy: 1
Traditional Sitting:50 1st Qu.: 0.000 Failure - too many OCs : 2
Median : 1.000 None :76
Mean : 1.286 Paresthesia : 5
3rd Qu.: 2.000 Wet Tap : 1
Max. :10.000
NA's :1
EPIDURALC <- na.omit(EPIDURAL) # removing the NA value
summary(EPIDURALC)
doctor kg cm ease
Dr. A:23 Min. : 48.00 Min. :152.0 Difficult :20
Dr. B:21 1st Qu.: 72.00 1st Qu.:160.0 Easy :56
Dr. C:20 Median : 84.50 Median :165.0 Impossible: 8
Dr. D:20 Mean : 86.21 Mean :165.3
3rd Qu.: 94.00 3rd Qu.:170.0
Max. :168.00 Max. :185.0
treatment oc complications
Hamstring Stretch :35 Min. : 0.000 Failure - person got dizzy: 0
Traditional Sitting:49 1st Qu.: 0.000 Failure - too many OCs : 2
Median : 1.000 None :76
Mean : 1.286 Paresthesia : 5
3rd Qu.: 2.000 Wet Tap : 1
Max. :10.000
B <- 10^4
bsm <- numeric(B)
for(i in 1:B){
bss <- sample(EPIDURALC$oc, size = sum(!is.na(EPIDURALC$oc)), replace = TRUE)
bsm[i] <- mean(bss)
}
(quantile(bsm, probs = c(0.03, 0.97)) -> BSPCI)
3% 97%
0.9047619 1.7261905
# Infer pipeline
EPIDURALC %>%
specify(response = oc) %>%
generate(reps = B, type = "bootstrap") %>%
calculate(stat = "mean") -> bsmip
head(bsmip)
Response: oc (numeric)
# A tibble: 6 × 2
replicate stat
<int> <dbl>
1 1 1.18
2 2 1.10
3 3 1.21
4 4 1.24
5 5 1.07
6 6 1.32
(get_confidence_interval(bsmip, level = 0.94) -> CI1)
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 0.893 1.73
# The following computes the same thing
(quantile(bsmip$stat, probs = c(0.03, 0.97)) -> CI2)
3% 97%
0.8928571 1.7261905
The 94% bootstrap percentile confidence interval for the mean number of obstructive contacts is: \(\text{CI}_{0.94}(\mu_{oc}) = [0.8928571, 1.7261905]\).
# Using base R with `bsm`
hist(bsm, xlab = substitute(paste(bar(X),"*")), main = "", col = "gray")
# Using ggplot2 with `bsm`
ggplot(data = data.frame(bsm = bsm), aes(x = bsm)) +
geom_histogram(binwidth = 0.1, fill = "gray", color = "black") +
theme_bw() +
labs(x = substitute(paste(bar(X),"*")))
# Using wrapper functions with bsmip (an infer object)
visualize(bsmip)
visualize(bsmip, fill = "purple") +
labs(x = substitute(paste(bar(X),"*")),
title = "Simulation-Based Bootstrap Distribution \n of the Mean Number of Obstructive Contacts") +
shade_confidence_interval(CI1, color = "red", fill = "lightblue") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
fat
of males in the BODYFAT
data set from the PASWR2
package.#Creating a vector with the Male fat values
BODYFAT %>%
filter(sex == "M") %>%
select(fat) %>%
pull() -> fat
fat
[1] 9.5 7.8 17.8 27.4
# Another approach using base R
subset(BODYFAT, subset = sex == "M", select = fat, drop = TRUE) -> fat2
fat2
[1] 9.5 7.8 17.8 27.4
# One more approach
BODYFAT$fat[BODYFAT$sex=="M"] -> fat3
fat3
[1] 9.5 7.8 17.8 27.4
B <- 10^4
bsmf <- numeric(B)
for(i in 1:B){
bss <- sample(fat, size = sum(!is.na(fat)), replace = TRUE)
bsmf[i] <- mean(bss)
}
(quantile(bsmf, probs = c(0.01, 0.99)) -> BSPCIF)
1% 99%
8.225 25.000
# Using infer
BODYFAT %>%
filter(sex == "M") %>%
specify(response = fat) %>%
generate(reps = B, type = "bootstrap") %>%
calculate(stat = "mean") -> IFM
(get_confidence_interval(IFM, level = 0.98) -> CIF1)
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 8.22 25
visualize(IFM) +
shade_confidence_interval(endpoints = CIF1)
EPIDURAL
data set from PASWR2
)EPIDURALC %>%
filter(treatment == "Hamstring Stretch") %>%
select(oc) %>%
pull() -> ochs # OCs for hamstring stretch
EPIDURALC %>%
filter(treatment == "Traditional Sitting") %>%
select(oc) %>%
pull() -> octs # OCs for traditional sitting
####
B <- 10^4
dm <- numeric(B)
for(i in 1:B){
bsshs <- sample(ochs, size = sum(!is.na(ochs)), replace = TRUE)
bssts <- sample(octs, size = sum(!is.na(octs)), replace = TRUE)
dm[i] <- mean(bsshs) - mean(bssts)
}
(quantile(dm, probs = c(0.05, 0.95)) -> CIDM)
5% 95%
-0.3510204 1.2285714
The 90% bootstrap percentile confidence interval for the difference in mean obstructive contacts for the hamstring stretch treatment minus the mean obstructive contacts for the traditional sitting position is: \(\text{CI}_{0.90}(\mu_{hs} - \mu _{ts})= [-0.3510204, 1.2285714]\).
EPIDURALC %>%
specify(oc ~ treatment) %>%
generate(reps = B, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("Hamstring Stretch", "Traditional Sitting")) -> BSDM
(get_confidence_interval(BSDM, level = 0.90) -> CIDM)
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 -0.355 1.22
visualize(BSDM) +
shade_confidence_interval(endpoints = CIDM)
results <- EPIDURAL$complications=="None"
(phat <- mean(EPIDURAL$complications=="None"))
[1] 0.8941176
B <- 10^4
phatbs <- numeric(B)
for(i in 1:B){
bss <- sample(results, size = sum(!is.na(results)), replace = TRUE)
phatbs[i] <- mean(bss)
}
(quantile(phatbs, probs = c(0.025, 0.975)) -> CIP)
2.5% 97.5%
0.8235294 0.9529412
EPIDURAL %>%
mutate(no_complications = (complications == "None")) %>%
specify(response = no_complications, success = "TRUE") %>%
generate(reps = 10^4, type = "bootstrap") %>%
calculate(stat = "prop") -> PS
get_confidence_interval(PS, level = 0.95)
# A tibble: 1 × 2
lower_ci upper_ci
<dbl> <dbl>
1 0.824 0.953
(n <- sum(!is.na(results)))
[1] 85
phat*n
[1] 76
(1 - phat)*n
[1] 9
CI <- phat +c(-1, 1)*qnorm(.975)*sqrt(phat*(1-phat)/n)
CI
[1] 0.8287071 0.9595282
# OR
library(binom)
binom.confint(x = 76, n = 85, conf.level = 0.95, methods = "all")
method x n mean lower upper
1 agresti-coull 76 85 0.8941176 0.8087988 0.9453536
2 asymptotic 76 85 0.8941176 0.8287071 0.9595282
3 bayes 76 85 0.8895349 0.8226871 0.9511168
4 cloglog 76 85 0.8941176 0.8064044 0.9434493
5 exact 76 85 0.8941176 0.8084961 0.9504273
6 logit 76 85 0.8941176 0.8088548 0.9439819
7 probit 76 85 0.8941176 0.8135824 0.9458938
8 profile 76 85 0.8941176 0.8172229 0.9476443
9 lrt 76 85 0.8941176 0.8172260 0.9476675
10 prop.test 76 85 0.8941176 0.8038144 0.9474398
11 wilson 76 85 0.8941176 0.8108648 0.9432876
In Class Problem: Use the data BODYFAT
from PASWR2
to construct a 95% percentile bootstrap confidence interval for \(\mu_{female\_fat} - \mu_{male\_fat}\).