library(ggplot2)
library(dplyr)
library(nycflights13)
library(knitr)
data(flights)
# Load Alaska data, deleting rows that have missing dep or arr data
alaska_flights <- flights %>%
filter(carrier == "AS") %>%
filter(!is.na(dep_delay) & !is.na(arr_delay))
# Number of observations
nrow(alaska_flights)
## [1] 709
ggplot(data=alaska_flights, aes(x = dep_delay, y = arr_delay)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)
model <- lm(arr_delay ~ dep_delay, data=alaska_flights)
model
##
## Call:
## lm(formula = arr_delay ~ dep_delay, data = alaska_flights)
##
## Coefficients:
## (Intercept) dep_delay
## -15.5989 0.9721
The above output is ugly and incomplete. Let’s use the tidy()
function from the broom
package:
library(broom)
model_output <- tidy(model, conf.int = TRUE)
kable(model_output, digits=3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
(Intercept) | -15.599 | 0.762 | -20.463 | 0 | -17.096 | -14.102 |
dep_delay | 0.972 | 0.024 | 40.733 | 0 | 0.925 | 1.019 |