Alaska Flights

Load Alaska Flights data:

library(ggplot2)
library(dplyr)
library(nycflights13)
library(knitr)
data(flights)

# Load Alaska data, deleting rows that have missing dep or arr data
alaska_flights <- flights %>% 
  filter(carrier == "AS") %>% 
  filter(!is.na(dep_delay) & !is.na(arr_delay))

# Number of observations
nrow(alaska_flights)
## [1] 709

Example 1: Departure vs Arrival Delay

Plot points and regression line

ggplot(data=alaska_flights, aes(x = dep_delay, y = arr_delay)) + 
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

Output regression info

model <- lm(arr_delay ~ dep_delay, data=alaska_flights)
model
## 
## Call:
## lm(formula = arr_delay ~ dep_delay, data = alaska_flights)
## 
## Coefficients:
## (Intercept)    dep_delay  
##    -15.5989       0.9721

The above output is ugly and incomplete. Let’s use the tidy() function from the broom package:

library(broom)
model_output <- tidy(model, conf.int = TRUE)
kable(model_output, digits=3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) -15.599 0.762 -20.463 0 -17.096 -14.102
dep_delay 0.972 0.024 40.733 0 0.925 1.019