This is the example code for Dunkler D, Sauerbrei W, Heinze G (2016). Global, Parameterwise and Joint Shrinkage Factor Estimation. Journal of Statistical Software. 69(8), 1-19. http://dx.doi.org/10.18637/jss.v069.i08.
################################################################################
### R code for
### "Global, Parameterwise and Joint Shrinkage Factor Estimation"
### written by Daniela Dunkler, March 2016
################################################################################
## works with R 3.2.3 & shrink-package 1.2.1
## load the packages
library("shrink")
library("survival")
library("mfp")
library("aod") # for wald-test
##
## Attache Paket: 'aod'
## Das folgende Objekt ist maskiert 'package:survival':
##
## rats
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=C LC_CTYPE=German_Austria.1252
## [3] LC_MONETARY=German_Austria.1252 LC_NUMERIC=C
## [5] LC_TIME=German_Austria.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] aod_1.3.1 mfp_1.5.2.1 survival_3.2-11 shrink_1.2.2
## [5] knitr_1.37
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 matrixStats_0.61.0 lubridate_1.8.0
## [4] rms_6.2-0 RColorBrewer_1.1-2 tools_4.1.0
## [7] backports_1.4.1 bslib_0.3.1 utf8_1.2.2
## [10] R6_2.5.1 rpart_4.1-15 Hmisc_4.6-0
## [13] DBI_1.1.2 colorspace_2.0-2 nnet_7.3-16
## [16] withr_2.4.3 tidyselect_1.1.1 gridExtra_2.3
## [19] compiler_4.1.0 quantreg_5.86 htmlTable_2.4.0
## [22] SparseM_1.81 sandwich_3.0-1 sass_0.4.0
## [25] scales_1.1.1 checkmate_2.0.0 mvtnorm_1.1-3
## [28] polspline_1.1.19 stringr_1.4.0 digest_0.6.29
## [31] foreign_0.8-81 rmarkdown_2.11 base64enc_0.1-3
## [34] jpeg_0.1-9 pkgconfig_2.0.3 htmltools_0.5.2
## [37] parallelly_1.30.0 fastmap_1.1.0 htmlwidgets_1.5.4
## [40] rlang_0.4.12 rstudioapi_0.13 jquerylib_0.1.4
## [43] generics_0.1.1 zoo_1.8-9 jsonlite_1.7.3
## [46] dplyr_1.0.7 ModelMetrics_1.2.2.2 magrittr_2.0.1
## [49] Formula_1.2-4 Matrix_1.3-3 Rcpp_1.0.8
## [52] munsell_0.5.0 fansi_1.0.2 lifecycle_1.0.1
## [55] multcomp_1.4-18 stringi_1.7.6 pROC_1.18.0
## [58] yaml_2.2.1 MASS_7.3-54 plyr_1.8.6
## [61] recipes_0.1.17 grid_4.1.0 parallel_4.1.0
## [64] listenv_0.8.0 crayon_1.4.2 lattice_0.20-44
## [67] splines_4.1.0 pillar_1.6.4 future.apply_1.8.1
## [70] reshape2_1.4.4 codetools_0.2-18 stats4_4.1.0
## [73] glue_1.6.0 evaluate_0.14 latticeExtra_0.6-29
## [76] data.table_1.14.2 png_0.1-7 vctrs_0.3.8
## [79] foreach_1.5.1 MatrixModels_0.5-0 gtable_0.3.0
## [82] purrr_0.3.4 future_1.23.0 assertthat_0.2.1
## [85] ggplot2_3.3.5 xfun_0.29 gower_0.2.2
## [88] prodlim_2019.11.13 class_7.3-19 timeDate_3043.102
## [91] tibble_3.1.6 conquer_1.2.1 iterators_1.0.13
## [94] cluster_2.1.2 lava_1.6.10 globals_0.14.0
## [97] TH.data_1.1-0 ellipsis_0.3.2 caret_6.0-90
## [100] ipred_0.9-12
## Section 2.1: Deep Vein Thrombosis Study
## load data
data("deepvein")
# number of observations, events, median observation time, and names of
# variables
nrow(deepvein)
## [1] 929
$status2 <- abs(deepvein$status-1)
deepveinsurvfit(Surv(time, status2) ~ 1, data = deepvein)
## Call: survfit(formula = Surv(time, status2) ~ 1, data = deepvein)
##
## n events median 0.95LCL 0.95UCL
## 929.0 782.0 37.8 35.6 41.7
table(deepvein$status)
##
## 0 1
## 782 147
round(100*table(deepvein$status)/nrow(deepvein), 2)
##
## 0 1
## 84.18 15.82
sort(names(deepvein))
## [1] "age" "bmi" "durther" "fiimut" "fvleid" "loc"
## [7] "log2ddim" "pnr" "sex" "status" "status2" "time"
## Section 2.2: Breast Cancer Study
## load data
data("GBSG")
# number of observations, events, median observation time, and names of
# variables
nrow(GBSG)
## [1] 686
table(GBSG$cens)
##
## 0 1
## 387 299
round(100*table(GBSG$cens)/nrow(GBSG), 2)
##
## 0 1
## 56.41 43.59
$cens2 <- abs(GBSG$cens-1)
GBSG# median observation time is given in days here
survfit(Surv(rfst, cens2) ~ 1, data = GBSG)
## Call: survfit(formula = Surv(rfst, cens2) ~ 1, data = GBSG)
##
## n events median 0.95LCL 0.95UCL
## 686 387 1645 1570 1717
# median observation time in months
1645 / 30.5
## [1] 53.93443
sort(names(GBSG))
## [1] "age" "cens" "cens2" "esm" "htreat" "id"
## [7] "menostat" "posnodal" "prm" "rfst" "tumgrad" "tumsize"
## Section 5.1: Deep Vein Thrombosis Study
# set the reference level for all categorical variables
$sex <- relevel(deepvein$sex, ref = "female")
deepvein$fiimut <- relevel(deepvein$fiimut, ref = "absent")
deepvein$fvleid <- relevel(deepvein$fvleid, ref = "absent")
deepvein$loc <- relevel(deepvein$loc, ref = "PE")
deepvein# contrasts(deepvein$loc)
## fit full model, and compute shrinkage factors (jackknife estimates and dfbeta
## approximations)
<- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
fitfull + loc + fiimut + fvleid, data = deepvein, x = TRUE)
sex summary(fitfull)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + bmi + durther +
## age + sex + loc + fiimut + fvleid, data = deepvein, x = TRUE)
##
## n= 929, number of events= 147
##
## coef exp(coef) se(coef) z Pr(>|z|)
## log2ddim 0.219517 1.245475 0.085739 2.560 0.01046 *
## bmi 0.005865 1.005883 0.019051 0.308 0.75817
## durther 0.021881 1.022122 0.023681 0.924 0.35550
## age -0.003973 0.996035 0.006583 -0.603 0.54622
## sexmale 0.495927 1.642019 0.189718 2.614 0.00895 **
## locdistal -0.905095 0.404504 0.311078 -2.910 0.00362 **
## locproximal -0.179351 0.835813 0.180336 -0.995 0.31996
## fiimutpresent -0.162573 0.849954 0.390499 -0.416 0.67718
## fvleidpresent -0.108886 0.896833 0.194228 -0.561 0.57506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## log2ddim 1.2455 0.8029 1.0528 1.4734
## bmi 1.0059 0.9942 0.9690 1.0442
## durther 1.0221 0.9784 0.9758 1.0707
## age 0.9960 1.0040 0.9833 1.0090
## sexmale 1.6420 0.6090 1.1321 2.3816
## locdistal 0.4045 2.4722 0.2199 0.7442
## locproximal 0.8358 1.1964 0.5870 1.1902
## fiimutpresent 0.8500 1.1765 0.3954 1.8272
## fvleidpresent 0.8968 1.1150 0.6129 1.3123
##
## Concordance= 0.61 (se = 0.026 )
## Likelihood ratio test= 26.2 on 9 df, p=0.002
## Wald test = 23.07 on 9 df, p=0.006
## Score (logrank) test = 23.9 on 9 df, p=0.004
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitfull)[c("locdistal", "locproximal"),
c("locdistal", "locproximal")],
b = fitfull$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 8.6, df = 2, P(> X2) = 0.014
shrink(fitfull, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.6069174
##
## Shrunken Regression Coefficients:
## log2ddim bmi durther age sexmale
## 0.133228852 0.003559850 0.013279842 -0.002411042 0.300986429
## locdistal locproximal fiimutpresent fvleidpresent
## -0.549317699 -0.108851134 -0.098668180 -0.066084738
## apply backward elimination to full model*, and compute shrinkage factors to
## selected model
## * dummy variables of loc are jointly tested and selected
<- step(fitfull, direction = "backward") fitd
## Start: AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc +
## fiimut + fvleid
##
## Df AIC
## - bmi 1 1782.2
## - fiimut 1 1782.2
## - fvleid 1 1782.4
## - age 1 1782.4
## - durther 1 1782.9
## <none> 1784.1
## - log2ddim 1 1788.6
## - sex 1 1789.2
## - loc 2 1790.5
##
## Step: AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fiimut +
## fvleid
##
## Df AIC
## - fiimut 1 1780.3
## - fvleid 1 1780.5
## - age 1 1780.5
## - durther 1 1781.0
## <none> 1782.2
## - log2ddim 1 1786.6
## - sex 1 1787.2
## - loc 2 1788.6
##
## Step: AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fvleid
##
## Df AIC
## - fvleid 1 1778.7
## - age 1 1778.7
## - durther 1 1779.1
## <none> 1780.3
## - log2ddim 1 1784.9
## - sex 1 1785.3
## - loc 2 1786.7
##
## Step: AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + loc
##
## Df AIC
## - age 1 1777.0
## - durther 1 1777.4
## <none> 1778.7
## - log2ddim 1 1783.2
## - sex 1 1783.4
## - loc 2 1785.0
##
## Step: AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + loc
##
## Df AIC
## - durther 1 1775.8
## <none> 1777.0
## - log2ddim 1 1781.5
## - sex 1 1782.6
## - loc 2 1783.3
##
## Step: AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + loc
##
## Df AIC
## <none> 1775.8
## - log2ddim 1 1780.3
## - sex 1 1781.2
## - loc 2 1782.8
print(fitd)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + loc, data = deepvein,
## x = TRUE)
##
## coef exp(coef) se(coef) z p
## log2ddim 0.21879 1.24457 0.08543 2.561 0.01043
## sexmale 0.49091 1.63380 0.18473 2.657 0.00787
## locdistal -0.92237 0.39758 0.31007 -2.975 0.00293
## locproximal -0.20505 0.81461 0.17867 -1.148 0.25112
##
## Likelihood ratio test=24.49 on 4 df, p=6.367e-05
## n= 929, number of events= 147
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitd)[c("locdistal", "locproximal"),
c("locdistal", "locproximal")],
b = fitd$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 9.1, df = 2, P(> X2) = 0.01
shrink(fitd, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8076362
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.1767045 0.3964779 -0.7449390 -0.1656066
<- shrink(fitd, type = "parameterwise", method = "jackknife")
pwsf pwsf
## Shrinkage Factors (type=parameterwise, method=jackknife):
## log2ddim sexmale locdistal locproximal
## 0.7321036 0.8351074 0.8393993 0.1321006
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.16017851 0.40996379 -0.77423621 -0.02708736
sqrt(diag(pwsf$ShrinkageFactorsVCOV))
## log2ddim sexmale locdistal locproximal
## 0.3703055 0.3699730 0.3268146 0.8696058
shrink(fitd, type = "parameterwise", method = "jackknife",
join = list(c("locdistal", "locproximal")))
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
## log2ddim sexmale locdistal locproximal
## 0.7806284 0.8363976 0.8055175 0.8055175
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.1707954 0.4105972 -0.7429847 -0.1651721
## dummy variables of loc are separately tested and selected
## generate dummy variables, fit full model and then apply backward elimination
<- cbind(deepvein, model.matrix( ~ loc, data = deepvein)[, -1])
deepvein <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
fitfull2 + locdistal + locproximal + fiimut + fvleid,
sex data = deepvein, x = TRUE) # fitfull2 is identical to fitfull
<- step(fitfull2, direction = "backward") fitd2
## Start: AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + locdistal +
## locproximal + fiimut + fvleid
##
## Df AIC
## - bmi 1 1782.2
## - fiimut 1 1782.2
## - fvleid 1 1782.4
## - age 1 1782.4
## - durther 1 1782.9
## - locproximal 1 1783.1
## <none> 1784.1
## - log2ddim 1 1788.6
## - sex 1 1789.2
## - locdistal 1 1792.5
##
## Step: AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal + fiimut + fvleid
##
## Df AIC
## - fiimut 1 1780.3
## - fvleid 1 1780.5
## - age 1 1780.5
## - durther 1 1781.0
## - locproximal 1 1781.2
## <none> 1782.2
## - log2ddim 1 1786.6
## - sex 1 1787.2
## - locdistal 1 1790.6
##
## Step: AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal + fvleid
##
## Df AIC
## - fvleid 1 1778.7
## - age 1 1778.7
## - durther 1 1779.1
## - locproximal 1 1779.3
## <none> 1780.3
## - log2ddim 1 1784.9
## - sex 1 1785.3
## - locdistal 1 1788.7
##
## Step: AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal
##
## Df AIC
## - age 1 1777.0
## - durther 1 1777.4
## - locproximal 1 1777.8
## <none> 1778.7
## - log2ddim 1 1783.2
## - sex 1 1783.4
## - locdistal 1 1787.0
##
## Step: AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + locdistal + locproximal
##
## Df AIC
## - durther 1 1775.8
## - locproximal 1 1776.2
## <none> 1777.0
## - log2ddim 1 1781.5
## - sex 1 1782.6
## - locdistal 1 1785.3
##
## Step: AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + locdistal + locproximal
##
## Df AIC
## - locproximal 1 1775.1
## <none> 1775.8
## - log2ddim 1 1780.3
## - sex 1 1781.2
## - locdistal 1 1784.8
##
## Step: AIC=1775.1
## Surv(time, status) ~ log2ddim + sex + locdistal
##
## Df AIC
## <none> 1775.1
## - log2ddim 1 1778.9
## - sex 1 1780.7
## - locdistal 1 1782.8
summary(fitd2)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + locdistal,
## data = deepvein, x = TRUE)
##
## n= 929, number of events= 147
##
## coef exp(coef) se(coef) z Pr(>|z|)
## log2ddim 0.20392 1.22621 0.08483 2.404 0.0162 *
## sexmale 0.49535 1.64107 0.18496 2.678 0.0074 **
## locdistal -0.84053 0.43148 0.30277 -2.776 0.0055 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## log2ddim 1.2262 0.8155 1.0384 1.4480
## sexmale 1.6411 0.6094 1.1421 2.3581
## locdistal 0.4315 2.3176 0.2384 0.7811
##
## Concordance= 0.602 (se = 0.026 )
## Likelihood ratio test= 23.16 on 3 df, p=4e-05
## Wald test = 20.03 on 3 df, p=2e-04
## Score (logrank) test = 20.85 on 3 df, p=1e-04
shrink(fitd2, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8439991
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal
## 0.1721124 0.4180761 -0.7094099
shrink(fitd2, type = "parameterwise", method = "jackknife")
## Shrinkage Factors (type=parameterwise, method=jackknife):
## log2ddim sexmale locdistal
## 0.7700839 0.8317218 0.8975722
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal
## 0.1570393 0.4119946 -0.7544400
## Table 2
<- c(shrink(fitd, type = "global", method = "jackknife")$ShrinkageFactors,
t2_jack $ShrinkageFactors,
pwsfshrink(fitd, type = "parameterwise", method = "jackknife",
join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
system.time(shrink(fitd, type = "parameterwise", method = "jackknife"))[1])
<- c(shrink(fitd, type = "global", method = "dfbeta")$ShrinkageFactors,
t2_dfbeta shrink(fitd, type = "parameterwise", method = "dfbeta")$ShrinkageFactors,
shrink(fitd, type = "parameterwise", method = "dfbeta",
join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
system.time(shrink(fitd, type = "parameterwise", method = "dfbeta"))[1])
<- cbind(Jackknife = t2_jack, DFBETA = t2_dfbeta)
t2
<- cbind(t2, round((t2[, 2] - t2[, 1]) / t2[, 1] * 100, 1))
Table2 1:2] <- round(Table2[, 1:2], 4)
Table2[, dimnames(Table2)[[2]][3] <- "Relative difference"
<- rbind(Table2[1,], rep(NA, 3), Table2[2:5,], rep(NA, 3), Table2[6:8,],
Table2 rep(NA, 3), Table2[10,])
dimnames(Table2)[[1]][c(1:2, 7, 10, 12)] <- c("Global shrinkage",
"Parameterwise shrinkage", "Joint shrinkage", "loc", "Computing time")
Table2
## Jackknife DFBETA Relative difference
## Global shrinkage 0.8076 0.8123 0.6
## Parameterwise shrinkage NA NA NA
## log2ddim 0.7321 0.7385 0.9
## sexmale 0.8351 0.8373 0.3
## locdistal 0.8394 0.8449 0.7
## locproximal 0.1321 0.1470 11.2
## Joint shrinkage NA NA NA
## log2ddim 0.7806 0.7864 0.7
## sexmale 0.8364 0.8386 0.3
## loc 0.8055 0.8111 0.7
## NA NA NA
## Computing time 0.7100 0.0100 -98.6
# max. difference
max(abs(Table2[1:10, 1] - Table2[1:10, 2]), na.rm=TRUE)
## [1] 0.0149
## Figure 1: Data simulated from deep vein thrombosis study: Comparison of Jackknife and
## DFBETA-approximated global shrinkage factors for selected and unselected
## models.
<- subset(deepvein, status == 0)
deepvein0 <- subset(deepvein, status == 1)
deepvein1
<- nrow(deepvein0)
n0 <- nrow(deepvein1)
n1
<- 0.2 # based on n1 / n0
ratio ## Note: Running this code is time-consuming, since 2 * B * S data sets are analyzed.
## size <- seq(from = 200, to = 2000, length.out = 19) # based on nrow(deepvein)
# B <- 100
## You may want to reduce B and length.out of size: e.g.
## size <- seq(from = 200, to = 2000, length.out = 4)
## B <- 3
#
# S <- length(size)
#
# shrinkGJ <- shrinkGD <- shrinkGJsel <- shrinkGDsel <- matrix(NA, nrow = B, ncol = S)
# set.seed(954681)
#
# for (s in 1:S) {
# cat("\n", s)
# for (b in 1:B) {
# cat(".")
# data <- rbind(deepvein0[sample(x = 1:n0, size = size[s]*(1-ratio), replace = TRUE),],
# deepvein1[sample(x = 1:n1, size = size[s]*ratio, replace = TRUE),])
#
# fitfull <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc +
# fiimut + fvleid, data = data, x = TRUE)
# fitsel <- step(fitfull, direction = "backward", trace = 0)
#
# if (!is.null(fitsel$coefficients)) { # no null model selected
# shrinkGJsel[b, s] <- shrink(fitsel, type = "global", method = "jackknife")$ShrinkageFactors
# shrinkGDsel[b, s] <- shrink(fitsel, type = "global", method = "dfbeta")$ShrinkageFactors
# }
#
#
# fit <- coxph(Surv(time, status) ~ log2ddim + sex + loc, data = data, x = TRUE)
#
# shrinkGJ[b, s] <- shrink(fit, type = "global", method = "jackknife")$ShrinkageFactors
# shrinkGD[b, s] <- shrink(fit, type = "global", method = "dfbeta")$ShrinkageFactors
# }
# }
## In some smaller data sets (and especially when shrinkage factors are estimated with the
## Jackknife method) there might be issues with convergence in individual data sets.
## However, coxph will issue a warning.
#
#
# shrinkGDselm <- apply(shrinkGDsel, 2, median)
# shrinkGJselm <- apply(shrinkGJsel, 2, median)
#
# cbind(n = size, Diff_J_D_sel = shrinkGJselm - shrinkGDselm)
#
# shrinkGDm <- apply(shrinkGD, 2, median)
# shrinkGJm <- apply(shrinkGJ, 2, median)
#
# cbind(n = size, Diff_J_D = shrinkGJm - shrinkGDm)
#
# xrange <- c(0.5, 1)
## xrange <- range(shrinkGDselm, shrinkGJselm, shrinkGDm, shrinkGJm)
## pdf("compJvsD.pdf", width = 6, height = 4)
# par(oma = c(0, 0, 0.5, 0.5), mar = c(3.5, 4, 0, 0))
# plot(range(size), xrange, type = "n", las = 1, xlab = "", ylab = "", xaxt = "n")
# axis(1, at = size)
# mtext(side = 1, text = "Sample size", line = 2.3)
# mtext(side = 2, text = "Global shrinkage factor", line = 2.8)
#
# points(size, shrinkGDselm, pch = 3, col = "darkolivegreen4", cex = 1.5)
# points(size, shrinkGJselm, pch = 1, col = "darkgoldenrod3", cex = 1.3)
# lines(size, shrinkGDselm, lty = 2, col = "darkolivegreen4", lwd = 2)
# lines(size, shrinkGJselm, lty = 3, col = "darkgoldenrod3", lwd = 2)
#
# points(size, shrinkGDm, pch = 3, col = "firebrick3", cex = 1.5)
# points(size, shrinkGJm, pch = 1, col = "dodgerblue3", cex = 1.3)
# lines(size, shrinkGDm, lty = 2, col = "firebrick3", lwd = 2)
# lines(size, shrinkGJm, lty = 3, col = "dodgerblue3", lwd = 2)
#
# legend(x = 1600, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "unselected",
# col = c("dodgerblue3", "firebrick3"), inset = 0.05, bty = "n", pch = c(1, 3),
# text.col = c("dodgerblue3", "firebrick3"), title.col = "black")
#
# legend(x = 1000, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "selected",
# col = c("darkgoldenrod3", "chartreuse4"), inset = 0.05, bty = "n", pch = c(1, 3),
# text.col = c("darkgoldenrod3", "chartreuse4"), title.col = "black")
## dev.off()
## Section 5.2: Breast Cancer Study
## define predictors as suggested by Sauerbrei (1999, Applied Statistics)
$enodes <- exp(-0.12*GBSG$posnodal)
GBSG
# create ordinal dummy-coded variable tumgrad for grades 1 to 3
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE,
dimnames = list(1:3, c("tumgrad1", "tumgrad2")))
contrasts(GBSG$tumgrad)
## tumgrad1 tumgrad2
## 1 0 0
## 2 1 0
## 3 1 1
# for nicer variable names use default dimnames
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE)
## # model selection (backward elimination) and estimation
<- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
fitg fp(enodes) + tumgrad + menostat + strata(htreat),
family = cox, data = GBSG, alpha = 0.05, select = 0.05)
print(fitg)
## Call:
## mfp(formula = Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) +
## fp(tumsize) + fp(enodes) + tumgrad + menostat + strata(htreat),
## data = GBSG, family = cox, alpha = 0.05, select = 0.05)
##
##
## Deviance table:
## Resid. Dev
## Null model 3198.026
## Linear model 3077.005
## Final model 3055.989
##
## Fractional polynomials:
## df.initial select alpha df.final power1 power2
## enodes 4 0.05 0.05 1 1 .
## prm 4 0.05 0.05 2 0.5 .
## tumgrad1 1 0.05 0.05 1 1 .
## tumgrad2 1 0.05 0.05 0 . .
## tumsize 4 0.05 0.05 0 . .
## menostat2 1 0.05 0.05 0 . .
## age 4 0.05 0.05 4 -2 -1
## esm 4 0.05 0.05 0 . .
##
##
## Transformations of covariates:
## formula
## age I((age/100)^-2)+I((age/100)^-1)
## prm I(((prm+1)/100)^0.5)
## esm <NA>
## tumsize <NA>
## enodes I(enodes^1)
## tumgrad tumgrad
## menostat <NA>
##
## Re-Scaling:
## Non-positive values in some of the covariates. No re-scaling was performed.
##
## coef exp(coef) se(coef) z p
## enodes.1 -1.9782 0.13832 0.2272 -8.706 0.00e+00
## prm.1 -0.5717 0.56456 0.1109 -5.154 2.55e-07
## tumgrad1.1 0.5137 1.67141 0.2495 2.059 3.95e-02
## age.1 0.6060 1.83303 0.1188 5.101 3.37e-07
## age.2 -2.6539 0.07037 0.5902 -4.496 6.91e-06
##
## Likelihood ratio test=142 on 5 df, p=0 n= 686
## Table 3
## (p-values unconditional on the selected degree and power for prm, age, and
## enodes = fitg$pvalues)
<- c("age.1", "age.2", "prm.1", "enodes.1", "tumgrad1.1")
varorder
<- c(NA, round(fitg$coefficients, 3)[varorder])
t3_beta_j <- c(fitg$df.initial[7, ], NA, NA, fitg$df.initial[c(2, 1, 3), ])
t3_df <- c(round(fitg$pvalues[7, "p.null"], 3), NA, NA,
t3_p round(fitg$pvalues[c(2, 1, 3), "p.null"], 3))
<- shrink(fitg, type = "parameterwise", method = "jackknife")
t3_pwsf0 <- round(c(NA, t3_pwsf0$ShrinkageFactors[varorder]), 3)
t3_pwsf <- round(c(NA, sqrt(diag(t3_pwsf0$ShrinkageFactorsVCOV))[varorder]), 3)
t3_pwsfse <- shrink(fitg, type = "parameterwise", method = "jackknife",
t3_jointsf0 join=list(c("age.1", "age.2")))
<- round(c(t3_jointsf0$ShrinkageFactors[varorder[1]], NA, NA,
t3_jointsf $ShrinkageFactors[varorder[-c(1:2)]]), 3)
t3_jointsf0<- round(c(sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))["join.age.1"],
t3_jointsfse NA, NA,
sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))[varorder[-c(1:2)]]), 3)
<- cbind("beta_j" = t3_beta_j, "df" = t3_df, "p value" = t3_p,
Table3 "PWSF" = t3_pwsf, "PWSF SE" = t3_pwsfse,
"Joint shrinkage" = t3_jointsf,
"Joint shrinkage SE" = t3_jointsfse)
dimnames(Table3)[[1]][1] <- "age"
Table3
## beta_j df p value PWSF PWSF SE Joint shrinkage Joint shrinkage SE
## age NA 4 0.001 NA NA 0.876 0.188
## age.1 0.606 NA NA 0.811 0.236 NA NA
## age.2 -2.654 NA NA 0.782 0.277 NA NA
## prm.1 -0.572 4 0.000 0.978 0.189 0.982 0.189
## enodes.1 -1.978 4 0.000 0.987 0.116 0.986 0.116
## tumgrad1.1 0.514 1 0.028 0.811 0.453 0.809 0.452
## compute shrinkage factors
<- shrink(fitg, type = "global", method = "jackknife")
globalsf globalsf
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.9526639
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.8845360 -0.5446421 0.4893540 0.5772861 -2.5282947
sqrt(globalsf$ShrinkageFactorsVCOV)
## global
## global 0.08062613
<- shrink(fitg, type = "parameterwise", method = "jackknife")
pwsf pwsf
## Shrinkage Factors (type=parameterwise, method=jackknife):
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.9874769 0.9777288 0.8108045 0.8108243 0.7822610
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.9534020 -0.5589719 0.4164852 0.4913355 -2.0760587
round(sqrt(diag(pwsf$ShrinkageFactorsVCOV)), 3)
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.116 0.189 0.453 0.236 0.277
round(cov2cor(pwsf$ShrinkageFactorsVCOV), 3)
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## enodes.1 1.000 -0.055 -0.078 0.030 0.021
## prm.1 -0.055 1.000 -0.200 0.026 0.032
## tumgrad1.1 -0.078 -0.200 1.000 -0.040 -0.035
## age.1 0.030 0.026 -0.040 1.000 0.984
## age.2 0.021 0.032 -0.035 0.984 1.000
<- shrink(fitg, type = "parameterwise", method = "jackknife",
jointsf join=list(c("age.1", "age.2")))
jointsf
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.9862779 0.9816686 0.8094979 0.8763299 0.8763299
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.9510302 -0.5612242 0.4158141 0.5310300 -2.3257103
round(sqrt(diag(jointsf$ShrinkageFactorsVCOV)), 3)
## join.age.1 enodes.1 prm.1 tumgrad1.1
## 0.188 0.116 0.189 0.452
## Figure 2: selected model: Log hazard relative to 50 years
# refit model fitg explicitly including the two dummy variables of
# tumgrad
<- cbind(GBSG, model.matrix(~tumgrad, data = GBSG)[, -1])
GBSG
<- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
fitg fp(enodes) + tumgrad1 + tumgrad2 + menostat + strata(htreat),
family = cox, data = GBSG, alpha = 0.05, select = 0.05)
<- shrink(fitg, type = "global", method = "jackknife")
globalsf <- shrink(fitg, type = "parameterwise", method = "jackknife")
pwsf <- shrink(fitg, type = "parameterwise", method = "jackknife",
jointsf join=list(c("age.1", "age.2")))
# define new data for which prediction is requested
# newdatref is the new data set for the reference observation
<- 30:75
age <- data.frame(age = age, enodes = 1, prm = 1, tumgrad1 = 0,
newdat htreat = factor(1))
<- data.frame(age = 50, enodes = 1, prm = 1, tumgrad1 = 0,
newdatref htreat = factor(1))
<- seq(from = min(age), to = max(age), by = 5)
xaxis
# pdf("plotgbsg.pdf", width = 6, height = 6)
par(fig = c(0, 1, 0, 0.3), new = FALSE, oma = c(0, 0, 0.5, 0.5),
mar = c(3.5, 4, 0, 0))
hist(GBSG$age[(GBSG$age >= min(age)) & (GBSG$age <= max(age))],
breaks = seq(from = min(age), to = max(age), by = 2.5), main = "",
xlim = range(age), xaxs = "r", yaxs = "r", xlab = "", ylab = "",
axes = FALSE, col = "gray88")
axis(side = 1, at = xaxis)
axis(side = 2, at = seq(from = 0, to = 60, by = 20), las = 2)
mtext(side = 1, text = "Age", line = 2.3)
mtext(side = 2, text = "Frequency", line = 2.8)
box()
par(fig = c(0,1,0.3,1), new = TRUE, oma = c(0, 0, 0.5, 0.5),
mar = c(1, 4, 0, 0))
plot(range(age), c(-0.05, 0.8), xlab = "", ylab = "", type = "n", yaxs = "i",
xaxt = "n", yaxt = "n")
axis(side = 1, at = xaxis, labels = rep("", times = length(xaxis)))
axis(side = 2, at = seq(from = 0, to = 0.7, by = 0.1), las = 2,
labels = seq(from = 0, to = 0.7, by = 0.1))
mtext(side = 2, text = "Log hazard relative to 50 years", line = 2.8)
lines(x = age, y = predict(fitg, newdata = newdat, type = "lp") -
predict(fitg, newdata = newdatref, type = "lp"), lty = 1, col = "gray25",
lwd = 2)
lines(x = age, y = predict(globalsf, newdata = newdat, type = "lp") -
predict(globalsf, newdata = newdatref, type = "lp"), lty = 5,
col = "forestgreen", lwd = 2)
lines(x = age, y = predict(pwsf, newdata = newdat, type="lp") -
predict(pwsf, newdata = newdatref, type = "lp"), lty = 2,
col = "dodgerblue3", lwd = 2)
lines(x = age, y = predict(jointsf, newdata = newdat, type = "lp") -
predict(jointsf, newdata = newdatref, type = "lp"), lty = 4,
col = "firebrick3", lwd = 2)
legend("topright", inset = 0.02, bty = "n", lty = c(1, 5, 4, 2),
title = "SHRINKAGE",
legend = c("No", "Global", "Joint", "Parameterwise"),
col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"), lwd = 2,
text.col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"))
# dev.off()