Exercise 28. Flexible parametric survival models in R

library(biostat3)  
library(rstpm2)  # for the flexible parametric model
library(dplyr)   # for data manipulation
data(melanoma)

## Extract subset and create 0/1 outcome variables
melanoma0 <- melanoma %>% filter(stage=="Localised") %>%
             mutate(event = ifelse(status=="Dead: cancer" & surv_mm<120, 1, 0),
                    time = pmin(120, surv_mm)/12,
                    agegrp1 = (agegrp=="0-44")+0,  # used by time-dependent effect
                    agegrp2 = (agegrp=="45-59")+0, # used by time-dependent effect
                    agegrp3 = (agegrp=="60-74")+0, # used by time-dependent effect
                    agegrp4 = (agegrp=="75+")+0)   # used by time-dependent effect

(a)

The stpm2 output can be seen below.

## (a) Flexible parametric model with df=4

fpma <- stpm2(Surv(time,event) ~ year8594, data=melanoma0, df=4)
exp(cbind(IRR=rstpm2::coef(fpma),rstpm2::confint(fpma)))
## Profiling...
## Warning in .local(object, parm, level, ...): non-monotonic spline fit
## to profile (nsx(log(time), df = 4)2): reverting from spline to linear
## approximation
##                                  IRR        2.5 %       97.5 %
## (Intercept)             1.699331e-04 1.593731e-04 1.807172e-04
## year8594Diagnosed 85-94 7.840078e-01 7.130851e-01 8.594660e-01
## nsx(log(time), df = 4)1 7.258686e+02 6.687423e+02 7.823594e+02
## nsx(log(time), df = 4)2 2.868368e+02 2.689145e+02 2.907032e+02
## nsx(log(time), df = 4)3 9.084766e+04 7.973384e+04 1.036427e+05
## nsx(log(time), df = 4)4 1.384754e+02 1.311110e+02 1.467950e+02
summary(fpma)
## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = negll, start = coef, eval.only = TRUE, vecpar = TRUE, 
##     gr = function (beta) 
##     {
##         localargs <- args
##         localargs$init <- beta
##         localargs$return_type <- "gradient"
##         return(.Call("model_output", localargs, PACKAGE = "rstpm2"))
##     }, control = list(parscale = c(1, 1, 1, 1, 1, 1), maxit = 300), 
##     lower = -Inf, upper = Inf)
## 
## Coefficients:
##                         Estimate Std. Error  z value     Pr(z)    
## (Intercept)             -8.68011    0.58363 -14.8727 < 2.2e-16 ***
## year8594Diagnosed 85-94 -0.24334    0.06584  -3.6959 0.0002191 ***
## nsx(log(time), df = 4)1  6.58737    0.57688  11.4190 < 2.2e-16 ***
## nsx(log(time), df = 4)2  5.65891    0.38070  14.8645 < 2.2e-16 ***
## nsx(log(time), df = 4)3 11.41694    1.14341   9.9850 < 2.2e-16 ***
## nsx(log(time), df = 4)4  4.93069    0.24526  20.1040 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 8440.488

The hazard ratio, 95% confidence interval and statistical significance are very similar to the Cox model.

cox <- coxph(Surv(time, event) ~ year8594,
             data=melanoma0) # year8594 is a categorical variable
summary(cox)
## Call:
## coxph(formula = Surv(time, event) ~ year8594, data = melanoma0)
## 
##   n= 5318, number of events= 960 
## 
##                             coef exp(coef) se(coef)      z Pr(>|z|)    
## year8594Diagnosed 85-94 -0.25297   0.77649  0.06579 -3.845 0.000121 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## year8594Diagnosed 85-94    0.7765      1.288    0.6825    0.8834
## 
## Concordance= 0.533  (se = 0.008 )
## Rsquare= 0.003   (max possible= 0.949 )
## Likelihood ratio test= 14.83  on 1 df,   p=0.0001177
## Wald test            = 14.78  on 1 df,   p=0.0001206
## Score (logrank) test = 14.86  on 1 df,   p=0.000116

(b)

The predicted survival and hazard functions are shown below.

## (b) Prediction and plots of survival and hazard by calendar period
years <- levels(melanoma0$year8594)
alegend <- function() legend("topright", legend=years, col=1:2, lty=1, bty="n")

plot(fpma,newdata=data.frame(year8594=years[1]),
     xlab="Time since diagnosis (years)")
lines(fpma,newdata=data.frame(year8594=years[1]), ci=FALSE) # repeated
alegend()

plot(fpma,newdata=data.frame(year8594=years[1]), type="haz",
     xlab="Time since diagnosis (years)", ylab="Hazard")
lines(fpma,newdata=data.frame(year8594=years[2]), type="haz", col=2)
alegend()

(c)

There is a constant difference as the predictions are from a proportional hazards model and a multiplicative effect becomes additive on the log scale.

## (c) hazards on log scale, adding log="y"
plot(fpma,newdata=data.frame(year8594=years[1]), type="haz",
     xlab="Time since diagnosis (years)", log="y", ci=FALSE,
     ylab="Hazard (log scale)")
lines(fpma,newdata=data.frame(year8594=years[2]), type="haz", col=2)
alegend()

(d)

The log hazard ratios (and hence the hazard ratios) from 2 df and up are similar and for 3 df they are very similar. The main difference is for 1 df, which is equivalent to a Weibull model. The Weibull model enforces a monotonic hazard function and as the hazard function in the melanoma data has a turning point it is clearly inappropriate.
The lowest AIC is for the model with 5 df and for the BIC it is the model with 2 df. The penalty term in the AIC is twice the number of parameters (\(2 \times k\)) whereas in the BIC it is \(\ln(D) \times k\) where \(D\) is the number of events. Since \(\ln(D) > k\) the BIC penalizes extra parameters much more strongly than AIC. Since we have a large data set and there are no disadvantages to including extra parameters we would use 5df for the baseline hazard.

summary(fpma)
## Maximum likelihood estimation
## 
## Call:
## mle2(minuslogl = negll, start = coef, eval.only = TRUE, vecpar = TRUE, 
##     gr = function (beta) 
##     {
##         localargs <- args
##         localargs$init <- beta
##         localargs$return_type <- "gradient"
##         return(.Call("model_output", localargs, PACKAGE = "rstpm2"))
##     }, control = list(parscale = c(1, 1, 1, 1, 1, 1), maxit = 300), 
##     lower = -Inf, upper = Inf)
## 
## Coefficients:
##                         Estimate Std. Error  z value     Pr(z)    
## (Intercept)             -8.68011    0.58363 -14.8727 < 2.2e-16 ***
## year8594Diagnosed 85-94 -0.24334    0.06584  -3.6959 0.0002191 ***
## nsx(log(time), df = 4)1  6.58737    0.57688  11.4190 < 2.2e-16 ***
## nsx(log(time), df = 4)2  5.65891    0.38070  14.8645 < 2.2e-16 ***
## nsx(log(time), df = 4)3 11.41694    1.14341   9.9850 < 2.2e-16 ***
## nsx(log(time), df = 4)4  4.93069    0.24526  20.1040 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## -2 log L: 8440.488
aicc <- bicc <- beta <- se <- rep(NULL,6)
BIC <- function(object, nknots){
    -2 * as.numeric(bbmle::logLik(object)) + log(sum(melanoma0$event)) * (1 + nknots)
}

for (i in 1:6 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  aicc[i] <- AIC(fitaic)
  bicc[i] <- BIC(fitaic, i)
  beta[i] <- as.numeric(coef(fitaic)[2])
  se[i] <- as.numeric(sqrt(fitaic@vcov[2,2]))
}
rbind(beta=beta, se=se, aicc=aicc, bicc=bicc)
##               [,1]          [,2]          [,3]          [,4]          [,5]
## beta   -0.11575120   -0.23959859   -0.24389286   -0.24333629   -0.24554505
## se      0.06575706    0.06584092    0.06581083    0.06584011    0.06580441
## aicc 8676.81191687 8452.82834577 8452.43320118 8452.48836681 8447.41676243
## bicc 8684.54578344 8465.42914562 8469.90093431 8474.82303323 8474.61836214
##               [,6]
## beta   -0.24600362
## se      0.06580313
## aicc 8447.71022014
## bicc 8479.77875313

(e)

## Baseline survival
fitaic0 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic0,newdata=data.frame(year8594=years[1]), lty=6, ci=F,
     xlab="Time since diagnosis (years)")

dfs <- c("s_df1","s_df2","s_df3","s_df4","s_df5","s_df6")
for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  plot(fitaic,newdata=data.frame(year8594=years[1]), add=TRUE,lty=i,
       xlab="Time since diagnosis (years)")
}
legend("topright", legend=dfs[1:6], lty=1:6)

## Baseline hazard
fitaic1 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic1,newdata=data.frame(year8594=years[1]), lty=6, type="haz",
     ci=F, xlab="Time since diagnosis (years)", ylab="Hazard")

for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  plot(fitaic,newdata=data.frame(year8594=years[1]), add=TRUE, lty=i,
       type="haz", xlab="Time since diagnosis (years)", ylab="Hazard")
}
dfs2 <- c("h_df1","h_df2","h_df3","h_df4","h_df5","h_df6")
legend("topright", legend=dfs2[1:6], lty=1:6)

With the exception of 1 df (the Weibull model), the survival and hazard functions show similar shapes, so as long we have enough knots our conclusions would be very similar.

(f)

fpmf <- stpm2(Surv(time, event) ~ sex + year8594 + agegrp,
              data=melanoma0, df=4)

beta <- coef(fpmf)[2:6] ## log(HR)
se <- sqrt(diag(fpmf@vcov[2:6,2:6]))            # standard errors of beta
z <- beta/se                                    # z-scores
confin <- cbind(beta - 1.96*se, beta + 1.96*se) # 95% confidence interval of beta=log(HR)
p_value <- 1-pchisq((beta/se)^2, 1)             # Wald-type test of beta

## Summaries of Beta and HR
cbind(beta = beta, se = se, z = beta/se, p_value = p_value) # summaries of beta
##                               beta         se         z      p_value
## sexFemale               -0.5315676 0.06545328 -8.121329 4.440892e-16
## year8594Diagnosed 85-94 -0.3240732 0.06623449 -4.892816 9.940341e-07
## agegrp45-59              0.2835336 0.09417351  3.010758 2.605968e-03
## agegrp60-74              0.6216986 0.09087778  6.841041 7.861933e-12
## agegrp75+                1.2238357 0.10445135 11.716802 0.000000e+00
cbind(HR = exp(beta), lower.95 = exp(confin)[,1], upper.95 =  exp(confin)[,2])
##                                HR  lower.95  upper.95
## sexFemale               0.5876830 0.5169257 0.6681255
## year8594Diagnosed 85-94 0.7231973 0.6351508 0.8234491
## agegrp45-59             1.3278135 1.1040150 1.5969790
## agegrp60-74             1.8620883 1.5582730 2.2251382
## agegrp75+               3.4002050 2.7707307 4.1726877
## To test the joint significance of categorized age with Wald-type test
## Joint H0:
##          beta_(agegrp45-59) = 0
##          beta_(agegrp60-74) = 0
##          beta_(agegrp75+) = 0
statistic <- t(coef(fpmf)[4:6]) %*% solve(vcov(fpmf)[4:6,4:6]) %*% coef(fpmf)[4:6]
p_value <- 1 - pchisq(statistic, 3)
cbind(chi2_statistics =statistic, P_value = p_value)
##          [,1] [,2]
## [1,] 155.7851    0
## To test the overall effect of age with LR test
fpmf2 <- stpm2(Surv(time,event) ~ sex + year8594 , data=melanoma0, df=4)
anova(fpmf, fpmf2)
## Likelihood Ratio Tests
## Model 1: fpmf, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time),
##           df = 4)4
## Model 2: fpmf2, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           nsx(log(time), df = 4)1+nsx(log(time), df = 4)2+nsx(log(time),
##           df = 4)3+nsx(log(time), df = 4)4
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     10   8241.3                         
## 2      7   8385.9 144.56  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The estimates are similar to those obtained from the Cox model. The Wald test yields a very highly significant result, which is similar to that obtained from the comparable test for the Cox model.

(g)

The estimates are so similar because very similar models are being fitted with exactly the same covariates. The two models differ only in the manner in which they account for the baseline hazard. In the Cox model it is assumed arbitrary and not directly estimated. In the flexible parametric model the baseline hazard is modelled using splines. The 5 df spline allows sufficient flexibility to make the model estimates virtually identical.

(h)

## (h) Change to time-varying effect of agegrp2:4
## NB: including main effect of agerp
fpmh <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
              data=melanoma0, tvc=list(agegrp2 = 2, agegrp3 = 2, agegrp4 = 2),
              smooth.formula=~ nsx(log(time),df=4)  )
## NB: no main effect of agegrp
fpmh1 <- stpm2(Surv(time,event) ~ sex + year8594 ,
               data=melanoma0, tvc=list(agegrp2 = 2, agegrp3 = 2, agegrp4 = 2),
               smooth.formula=~ nsx(log(time),df=4)  )

beta <- coef(fpmh)[2:6] ## log(HR)
se <- sqrt(diag(fpmh@vcov[2:6,2:6])) ## standard errors of beta
z <- beta/se ## z-scores
confin <- cbind(beta - 1.96*se, beta + 1.96*se) ## 95% confidence interval of beta=log(HR)
p_value <- 1-pchisq((beta/se)^2, 1) ## Wald-type test of beta

## Summaries of Beta and HR
cbind(beta = beta, se = se, z = beta/se, p_value = p_value) ## summaries of beta
##                               beta         se         z      p_value
## sexFemale               -0.5257152 0.06542329 -8.035596 8.881784e-16
## year8594Diagnosed 85-94 -0.3282799 0.06631778 -4.950105 7.417362e-07
## agegrp2                  1.8952278 1.45204507  1.305213 1.918204e-01
## agegrp3                  1.6720363 1.40052457  1.193864 2.325311e-01
## agegrp4                  5.2026583 1.35762597  3.832174 1.270161e-04
cbind(HR = exp(beta), lower.95 = exp(confin)[,1], upper.95 =  exp(confin)[,2])
##                                  HR   lower.95     upper.95
## sexFemale                 0.5911324  0.5199904    0.6720077
## year8594Diagnosed 85-94   0.7201614  0.6323813    0.8201262
## agegrp2                   6.6540640  0.3864393  114.5757449
## agegrp3                   5.3229959  0.3419838   82.8527094
## agegrp4                 181.7547617 12.7013896 2600.8802455
## LR test comparing fpmh (non-PH for agegrp2:4) with fpmf(PH for agegrp2:4)
anova(fpmh, fpmf)
## Likelihood Ratio Tests
## Model 1: fpmh, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp2+agegrp3+agegrp4+nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time),
##           df = 4)4+agegrp2:nsx(log(time), df = 2)1+
##           agegrp2:nsx(log(time), df = 2)2+agegrp3:nsx(log(time), df =
##           2)1+agegrp3:nsx(log(time), df = 2)2+agegrp4:nsx(log(time), df
##           = 2)1+agegrp4:nsx(log(time), df = 2)2
## Model 2: fpmf, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time),
##           df = 4)4
##   Tot Df Deviance  Chisq Df Pr(>Chisq)    
## 1     16   8212.1                         
## 2     10   8241.3 29.271  6  5.406e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fpmh1, fpmf)
## Likelihood Ratio Tests
## Model 1: fpmh1, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           nsx(log(time), df = 4)1+nsx(log(time), df = 4)2+nsx(log(time),
##           df = 4)3+nsx(log(time), df = 4)4+agegrp2:nsx(log(time), df =
##           2)1+agegrp2:nsx(log(time), df = 2)2+nsx(log(time), df =
##           2)1:agegrp3+nsx(log(time), df = 2)2:agegrp3+nsx(log(time), df
##           = 2)1:agegrp4+nsx(log(time), df = 2)2:agegrp4
## Model 2: fpmf, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp45-59+agegrp60-74+agegrp75++nsx(log(time), df = 4)1+
##           nsx(log(time), df = 4)2+nsx(log(time), df = 4)3+nsx(log(time),
##           df = 4)4
##   Tot Df Deviance  Chisq Df Pr(>Chisq)
## 1     13   8237.3                     
## 2     10   8241.3 4.0156  3     0.2598
## I investigated the non-proportional effect of age with penalized models
## here, sp is the optimal smoothing parameters estimated from models without sp argument
pfit0 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                smooth.formula=~s(log(time)), data=melanoma0, sp=0.1359685)

## The time-dependent effects including linear forms of age groups
pfit1 <- pstpm2(Surv(time,event) ~ sex + year8594,
                smooth.formula=~s(log(time)) + s(log(time),by=agegrp2) +
                                s(log(time),by=agegrp3) + s(log(time),by=agegrp4),
                data=melanoma0, sp=c( 0.1429949, 1.6133966, 1.3183117, 1.9958815))
anova(pfit1, pfit0)## the results also suggest there is strong evidence
## Likelihood Ratio Tests
## Model 1: pfit1, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           s(log(time)).1+s(log(time)).2+s(log(time)).3+s(log(time)).4+
##           s(log(time)).5+s(log(time)).6+s(log(time)).7+s(log(time)).8+
##           s(log(time)).9+s(log(time)):agegrp2.1+s(log(time)):agegrp2.2+
##           s(log(time)):agegrp2.3+s(log(time)):agegrp2.4+
##           s(log(time)):agegrp2.5+s(log(time)):agegrp2.6+
##           s(log(time)):agegrp2.7+s(log(time)):agegrp2.8+
##           s(log(time)):agegrp2.9+s(log(time)):agegrp2.10+
##           s(log(time)):agegrp3.1+s(log(time)):agegrp3.2+
##           s(log(time)):agegrp3.3+s(log(time)):agegrp3.4+
##           s(log(time)):agegrp3.5+s(log(time)):agegrp3.6+
##           s(log(time)):agegrp3.7+s(log(time)):agegrp3.8+
##           s(log(time)):agegrp3.9+s(log(time)):agegrp3.10+
##           s(log(time)):agegrp4.1+s(log(time)):agegrp4.2+
##           s(log(time)):agegrp4.3+s(log(time)):agegrp4.4+
##           s(log(time)):agegrp4.5+s(log(time)):agegrp4.6+
##           s(log(time)):agegrp4.7+s(log(time)):agegrp4.8+
##           s(log(time)):agegrp4.9+s(log(time)):agegrp4.10
## Model 2: pfit0, [negll]: (Intercept)+sexFemale+year8594Diagnosed 85-94+
##           agegrp2+agegrp3+agegrp4+s(log(time)).1+s(log(time)).2+
##           s(log(time)).3+s(log(time)).4+s(log(time)).5+s(log(time)).6+
##           s(log(time)).7+s(log(time)).8+s(log(time)).9
##   Tot Df Deviance  Chisq     Df Pr(>Chisq)   
## 1 23.878   8203.8                            
## 2 14.124   8230.9 27.064 9.7533   0.002193 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is strong evidence of a non-proportional effect of age.

(i)

The baseline hazard is shown below. This baseline is for the youngest age group who are male and diagnosed in 1975–1984, i.e, when all the covariates are equal to zero.

## (i) Plot of baseline hazard with fpmh
sexs <- levels(melanoma$sex)
years <- levels(melanoma$year8594)
agegrps <- levels(melanoma$agegrp)
par(mfrow=c(1,1))
newdata1 <- data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0)
plot(fpmh,newdata=newdata1, xlab="Time since diagnosis (years)", ylab="Hazard",
     type="haz", ci=FALSE, ylim=c(0,0.051), rug=FALSE)
legend("topright", legend=c(sexs[1], years[1], agegrps[1]), lty=1, bty="n")

(j)

plot(fpmh, newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="hr", rug=FALSE, line.col=1, ci=FALSE,log="y", ylim=c(1,500), lty=1,
     exposed=function(data) transform(data,sex=sexs[1],year8594=years[1],agegrp2=1, agegrp3=0, agegrp4=0),
     xlab="Time since diagnosis (years)",ylab="Hazards ratio")
plot(fpmh, newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="hr", rug=FALSE, line.col=1, ci=FALSE, add=T,log="y", lty=2,
     exposed=function(data) transform(data,sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=1, agegrp4=0),
     xlab="Time since diagnosis (years)",ylab="Hazards ratio")
plot(fpmh, newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="hr", rug=FALSE, line.col=1, ci=FALSE,add=T,log="y", lty=3,
     exposed=function(data) transform(data,sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=1),
     xlab="Time since diagnosis (years)",ylab="Hazards ratio")
legend("topright", legend=agegrps[2:4], lty=1:3)

plot(fpmh, newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="hr", rug=FALSE, line.col=1, ci=T, log="y",
     exposed=function(data) transform(data,sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=1),
     xlab="Time since diagnosis (years)",ylab="Hazards ratio")
## Warning in xy.coords(x, y, xlabel, ylabel, log = log): 9 y values <= 0
## omitted from logarithmic plot

    The hazard ratios decrease as a function of follow-up time. The
    hazard ratio is so high during the early years of follow-up
    because the hazard in the reference group is close to
    zero. The hazard ratio for the oldest age
    group with 95% confidence intervals is also shown.

(k)

plot(fpmh,newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="hdiff", rug=FALSE, line.col=1, ci=T,
     exposed=function(data) transform(data,sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=1),
     xlab="Time since diagnosis (years)",ylab="Hazards difference")

The hazard difference is small early on, despite the hazard ratio being large, because the underlying hazard is so low.

(l)

plot(fpmh,newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
     type="sdiff", rug=FALSE, line.col=1, ci=T,
     exposed=function(data) transform(data, sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=1),
     xlab="Time since diagnosis (years)",ylab="Survivals difference")

(m)

aicc <- bicc <- beta <- se <- rep(1,3)
nevent <- sum(melanoma0$event)
BIC <- function(object, nknots){
  -2 * as.numeric(bbmle::logLik(object)) + log(nevent) * (1 + nknots)
}
for (i in 1:3 ) {
  fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                  data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                  smooth.formula=~ nsx(log(time),df=4)  )
  aicc[i] <- AIC(fitaic)
  bicc[i] <- BIC(fitaic, i)
}
cbind(aicc=aicc, bicc=bicc)
##          aicc     bicc
## [1,] 8447.417 8447.151
## [2,] 8447.417 8454.018
## [3,] 8447.417 8460.884
## PLots with different df
fitdf1 <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4, data=melanoma0,
                tvc = list(agegrp2 = 1, agegrp3 = 1, agegrp4 = 1), smooth.formula=~ nsx(log(time),df=4))

plot(fitdf1, newdata = data.frame(sex=sexs[1], year8594=years[1], agegrp2=0, agegrp3=0, agegrp4=0),
     type="hr", rug=FALSE, line.col=1, ci=F, log="y", lty=1,
     exposed=function(data) transform(data, sex = sexs[1],year8594=years[1],
                                        agegrp2=0, agegrp3 = 0, agegrp4=1),
     xlab="Time since diagnosis (years)", ylab="Hazards ratio")

for (i in 2:3 ) {
    fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                   data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                   smooth.formula=~ nsx(log(time),df=4))

    plot(fitdf, newdata=data.frame(sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=0),
         type="hr", rug=FALSE, line.col=1, ci=F, log="y", add=T, lty=i,
         exposed=function(data) transform(data, sex=sexs[1],year8594=years[1],agegrp2=0, agegrp3=0, agegrp4=1),
         xlab="Time since diagnosis (years)",ylab="Hazards ratio")
}
legend("topright", legend=c("1 df", "2 df", "3 df"), lty=1:3)