9.1 Bivariate Graphing

(C \(\rightarrow\) C) Prevalence of Nicotine Dependence (C) by Depression Status (C) (among current, daily, young adult smokers \(\rightarrow\) values stored in nesarc created in chapter 7)

ggplot(data = nesarc, aes(x = MajorDepression, fill = TobaccoDependence)) + 
  geom_bar(position = "fill") +
  theme_bw() + 
  labs(x = "", y = "Fraction", 
       title = "Fraction of young adult daily smokers\nwith and without nicotine addiction\nby depression status") + 
  scale_fill_manual(values = c("green", "red"), name = "Tobacco Addiction Status") + 
  guides(fill = guide_legend(reverse = TRUE))

Mosaic Plots

mosaic(~TobaccoDependence + MajorDepression ,data = nesarc, shade = TRUE)

(\(C \rightarrow Q\)) Boxplots and Violin plots

ggplot(data = frustration, aes(x = Major, y = Frustration.Score)) +
  geom_boxplot() + 
  theme_bw() + 
  labs(x = "", y = "Frustration Score", title = "Frustration Score by\n Academic Major")

# Violin plots
ggplot(data = frustration, aes(x = Major, y = Frustration.Score)) +
  geom_violin() + 
  theme_bw() +
  labs(x = "", y = "Frustration Score", title = "Frustration Score by\n Academic Major")

(Q \(\rightarrow\) Q) Scatter plots

ggplot(data = GRADES, aes(x = sat, y = gpa)) + 
  geom_point(color = "lightblue") + 
  theme_bw() +
  labs(x = "SAT score", y = "First semester college Grade Point Average") +
  geom_smooth(method = "lm")

(\(Q \rightarrow C\)) Scatter plot for logistic regression

Default$defaultN <- ifelse(Default$default == "No", 0, 1)
Default$studentN <- ifelse(Default$student =="No", 0, 1)
ggplot(data = Default, aes(x = balance, y = defaultN)) + 
  geom_point(alpha = 0.5) + 
  theme_bw() + 
  stat_smooth(method = "glm", method.args = list(family = "binomial")) +
  labs(y = "Probability of Default")