9.1 Bivariate Graphing
(C \(\rightarrow\) C) Prevalence of Nicotine Dependence (C) by Depression Status (C) (among current, daily, young adult smokers \(\rightarrow\) values stored in nesarc
created in chapter 7)
library(ggplot2)
library(PDS)
ggplot(data = nesarc, aes(x = MajorDepression, fill = TobaccoDependence)) +
geom_bar(position = "fill") +
theme_bw() +
labs(x = "", y = "Fraction",
title = "Fraction of young adult daily smokers\nwith and without nicotine addiction\nby depression status") +
scale_fill_manual(values = c("green", "red"), name = "Tobacco Addiction Status") +
guides(fill = guide_legend(reverse = TRUE))
Mosaic Plots
library(vcd)
mosaic(~TobaccoDependence + MajorDepression ,data = nesarc, shade = TRUE)
(\(C \rightarrow Q\)) Boxplots and Violin plots
ggplot(data = frustration, aes(x = Major, y = Frustration.Score)) +
geom_boxplot() +
theme_bw() +
labs(x = "", y = "Frustration Score", title = "Frustration Score by\n Academic Major")
# Violin plots
ggplot(data = frustration, aes(x = Major, y = Frustration.Score)) +
geom_violin() +
theme_bw() +
labs(x = "", y = "Frustration Score", title = "Frustration Score by\n Academic Major")
(Q \(\rightarrow\) Q) Scatter plots
library(PASWR2)
ggplot(data = GRADES, aes(x = sat, y = gpa)) +
geom_point(color = "lightblue") +
theme_bw() +
labs(x = "SAT score", y = "First semester college Grade Point Average") +
geom_smooth(method = "lm")
(\(Q \rightarrow C\)) Scatter plot for logistic regression
library(ISLR)
library(ggplot2)
Default$defaultN <- ifelse(Default$default == "No", 0, 1)
Default$studentN <- ifelse(Default$student =="No", 0, 1)
ggplot(data = Default, aes(x = balance, y = defaultN)) +
geom_point(alpha = 0.5) +
theme_bw() +
stat_smooth(method = "glm", method.args = list(family = "binomial")) +
labs(y = "Probability of Default")