library(vinereg)
require(ggplot2)
require(dplyr)
require(purrr)
require(scales)
require(quantreg)
<- function(covs, preds) {
plot_marginal_effects cbind(covs, preds) %>%
::gather(alpha, prediction, -seq_len(NCOL(covs))) %>%
tidyr::mutate(prediction = as.numeric(prediction)) %>%
dplyr::gather(variable, value, -(alpha:prediction)) %>%
tidyr::mutate(value = as.numeric(value)) %>%
dplyrggplot(aes(value, prediction, color = alpha)) +
geom_point(alpha = 0.15) +
geom_smooth(span = 0.5, se = FALSE) +
facet_wrap(~ variable, scale = "free_x") +
theme(legend.position = "none") +
theme(plot.margin = unit(c(0, 0, 0, 0), "mm")) +
xlab("")
}
<- read.csv("day.csv")
bikedata 2] <- as.Date(bikedata[, 2])
bikedata[, head(bikedata)
## instant dteday season yr mnth holiday weekday workingday weathersit
## 1 1 2011-01-01 1 0 1 0 6 0 2
## 2 2 2011-01-02 1 0 1 0 0 0 2
## 3 3 2011-01-03 1 0 1 0 1 1 1
## 4 4 2011-01-04 1 0 1 0 2 1 1
## 5 5 2011-01-05 1 0 1 0 3 1 1
## 6 6 2011-01-06 1 0 1 0 4 1 1
## temp atemp hum windspeed casual registered cnt
## 1 0.344167 0.363625 0.805833 0.1604460 331 654 985
## 2 0.363478 0.353739 0.696087 0.2485390 131 670 801
## 3 0.196364 0.189405 0.437273 0.2483090 120 1229 1349
## 4 0.200000 0.212122 0.590435 0.1602960 108 1454 1562
## 5 0.226957 0.229270 0.436957 0.1869000 82 1518 1600
## 6 0.204348 0.233209 0.518261 0.0895652 88 1518 1606
<- bikedata %>%
bikedata rename(
temperature = atemp,
month = mnth,
weathersituation = weathersit,
humidity = hum,
count = cnt
)
See variable description on UCI web page.
<- bikedata %>%
bikedata mutate(
temperature = 66 * temperature + 16,
windspeed = 67 * windspeed,
humidity = 100 * humidity
)
ggplot(bikedata, aes(dteday, count)) +
geom_line() +
scale_x_date(labels = scales::date_format("%b %y")) +
xlab("date") +
ylab("rental count") +
stat_smooth(method = "lm", se = FALSE, linetype = "dashed") +
theme(plot.title = element_text(lineheight = 0.8, size = 20)) +
theme(text = element_text(size = 18))
<- lm(count ~ instant, data = bikedata)
lm_trend <- predict(lm_trend)
trend <- mutate(bikedata, count = count / trend)
bikedata ggplot(bikedata, aes(dteday, count)) +
geom_line() +
scale_x_date(labels = scales::date_format("%b %y")) +
xlab("date") +
ylab("detrended rental count") +
theme(plot.title = element_text(lineheight = 0.8, size = 20)) +
theme(text = element_text(size = 18))
<- bikedata %>%
bikedata select(-instant, -dteday, -yr) %>% # time indices
select(-casual, -registered) %>% # casual + registered = count
select(-holiday) %>% # we use 'workingday' instead
select(-temp) # we use 'temperature' (feeling temperature)
ordered
<- c("season", "month", "weekday", "workingday", "weathersituation")
disc_vars <- bikedata %>%
bikedata mutate(weekday = ifelse(weekday == 0, 7, weekday)) %>% # sun at end of week
::modify_at(disc_vars, as.ordered) purrr
<- vinereg(
fit ~ .,
count data = bikedata,
family = c("onepar", "tll"),
selcrit = "aic"
) fit
## D-vine regression model: count | temperature, humidity, windspeed, month, season, weathersituation, weekday, workingday
## nobs = 731, edf = 72.95, cll = 429.36, caic = -712.83, cbic = -377.68
summary(fit)
## var edf cll caic cbic p_value
## 1 count 9.59683 -198.076002 415.345665 459.43747179 NA
## 2 temperature 21.97990 415.784630 -787.609452 -686.62468419 1.123137e-161
## 3 humidity 17.93800 118.945709 -202.015413 -119.60081170 2.158275e-40
## 4 windspeed 1.00000 22.858947 -43.717894 -39.12348021 1.365695e-11
## 5 month 1.00000 12.485269 -22.970538 -18.37612437 5.821310e-07
## 6 season 1.00000 11.267339 -20.534677 -15.94026356 2.063842e-06
## 7 weathersituation 1.00000 12.110503 -22.221007 -17.62659308 8.589035e-07
## 8 weekday 18.43143 30.676323 -24.489791 60.19180978 1.638415e-06
## 9 workingday 1.00000 3.307899 -4.615798 -0.02138446 1.010780e-02
<- c(0.1, 0.5, 0.9)
alpha_vec <- fitted(fit, alpha_vec) pred
plot_marginal_effects(
covs = select(bikedata, temperature),
preds = pred
)
plot_marginal_effects(covs = select(bikedata, humidity), preds = pred) +
xlim(c(25, 100))
plot_marginal_effects(covs = select(bikedata, windspeed), preds = pred)
<- c("Jan","", "Mar", "", "May", "", "Jul", "", "Sep", "", "Nov", "")
month_labs plot_marginal_effects(covs = select(bikedata, month), preds = pred) +
scale_x_discrete(limits = 1:12, labels = month_labs)
plot_marginal_effects(covs = select(bikedata, weathersituation),
preds = pred) +
scale_x_discrete(limits = 1:3,labels = c("good", "medium", "bad"))
<- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
weekday_labs plot_marginal_effects(covs = select(bikedata, weekday), preds = pred) +
scale_x_discrete(limits = 1:7, labels = weekday_labs)
plot_marginal_effects(covs = select(bikedata, workingday), preds = pred) +
scale_x_discrete(limits = 0:1, labels = c("no", "yes")) +
geom_smooth(method = "lm", se = FALSE) +
xlim(c(0, 1))
<- c("spring", "summer", "fall", "winter")
season_labs plot_marginal_effects(covs = select(bikedata, season), preds = pred) +
scale_x_discrete(limits = 1:4, labels = season_labs)