This document provides a brief tutorial to analyzing twin data using the mets
package:
\(\newcommand{\cov}{\mathbb{C}\text{ov}} \newcommand{\cor}{\mathbb{C}\text{or}} \newcommand{\var}{\mathbb{V}\text{ar}} \newcommand{\E}{\mathbb{E}} \newcommand{\unitfrac}[2]{#1/#2} \newcommand{\n}{}\)
The development version may be installed from github:
# install.packages("remotes")
::install_github("kkholst/mets", dependencies="Suggests") remotes
In the following we examine the heritability of Body Mass Indexkorkeila_bmi_1991 hjelmborg_bmi_2008, based on data on self-reported BMI-values from a random sample of 11,411 same-sex twins. First, we will load data
library(mets)
data("twinbmi")
head(twinbmi)
#> tvparnr bmi age gender zyg id num
#> 1 1 26.33289 57.51212 male DZ 1 1
#> 2 1 25.46939 57.51212 male DZ 1 2
#> 3 2 28.65014 56.62696 male MZ 2 1
#> 5 3 28.40909 57.73097 male DZ 3 1
#> 7 4 27.25089 53.68683 male DZ 4 1
#> 8 4 28.07504 53.68683 male DZ 4 2
The data is on long format with one subject per row.
tvparnr
: twin idbmi
: Body Mass Index (\(\mathrm{kg}/{\mathrm{m}^2}\))age
: Age (years)gender
: Gender factor (male,female)zyg
: zygosity (MZ, DZ)We transpose the data allowing us to do pairwise analyses
<- fast.reshape(twinbmi, id="tvparnr",varying=c("bmi"))
twinwide head(twinwide)
#> tvparnr bmi1 age gender zyg id num bmi2
#> 1 1 26.33289 57.51212 male DZ 1 1 25.46939
#> 3 2 28.65014 56.62696 male MZ 2 1 NA
#> 5 3 28.40909 57.73097 male DZ 3 1 NA
#> 7 4 27.25089 53.68683 male DZ 4 1 28.07504
#> 9 5 27.77778 52.55838 male DZ 5 1 NA
#> 11 6 28.04282 52.52231 male DZ 6 1 22.30936
Next we plot the association within each zygosity group
We here show the log-transformed data which is slightly more symmetric and more appropiate for the twin analysis (see Figure @ref(fig:scatter1) and @ref(fig:scatter2))
<- log(subset(twinwide, zyg=="MZ")[,c("bmi1","bmi2")])
mz scatterdens(mz)
Scatter plot of logarithmic BMI measurements in MZ twins
<- log(subset(twinwide, zyg=="DZ")[,c("bmi1","bmi2")])
dz scatterdens(dz)
Scatter plot of logarithmic BMI measurements in DZ twins
The plots and raw association measures shows considerable stronger dependence in the MZ twins, thus indicating genetic influence of the trait
cor.test(mz[,1],mz[,2], method="spearman")
#>
#> Spearman's rank correlation rho
#>
#> data: mz[, 1] and mz[, 2]
#> S = 165457624, p-value < 2.2e-16
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> 0.6956209
cor.test(dz[,1],dz[,2], method="spearman")
#>
#> Spearman's rank correlation rho
#>
#> data: dz[, 1] and dz[, 2]
#> S = 2162514570, p-value < 2.2e-16
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> 0.4012686
Ńext we examine the marginal distribution (GEE model with working independence)
<- lm(bmi ~ gender + I(age-40), data=twinbmi)
l0 estimate(l0, id=twinbmi$tvparnr)
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) 23.3687 0.054534 23.2618 23.4756 0.000e+00
#> gendermale 1.4077 0.073216 1.2642 1.5512 2.230e-82
#> I(age - 40) 0.1177 0.004787 0.1083 0.1271 1.499e-133
library("splines")
<- lm(bmi ~ gender*ns(age,3), data=twinbmi)
l1 <- estimate(l1, id=twinbmi$tvparnr) marg1
<- Expand(twinbmi,
dm bmi=0,
gender=c("male"),
age=seq(33,61,length.out=50))
<- Expand(twinbmi,
df bmi=0,
gender=c("female"),
age=seq(33,61,length.out=50))
plot(marg1, function(p) model.matrix(l1,data=dm)%*%p,
data=dm["age"], ylab="BMI", xlab="Age",
ylim=c(22,26.5))
plot(marg1, function(p) model.matrix(l1,data=df)%*%p,
data=df["age"], col="red", add=TRUE)
legend("bottomright", c("Male","Female"),
col=c("black","red"), lty=1, bty="n")
Marginal association between BMI and Age for males and females.
We can decompose the trait into the following variance components
\[\begin{align*} Y_i = A_i + D_i + C + E_i, \quad i=1,2 \end{align*}\]
Dissimilarity of MZ twins arises from unshared environmental effects only, \(\cor(E_1,E_2)=0\) and
\[\begin{align*} \cor(A_1^{MZ},A_2^{MZ}) = 1, \quad \cor(D_1^{MZ},D_2^{MZ}) = 1, \end{align*}\]
\[\begin{align*} \cor(A_1^{DZ},A_2^{DZ}) = 0.5, \quad \cor(D_1^{DZ},D_2^{DZ}) = 0.25, \end{align*}\]
\[\begin{align*} Y_i = A_i + C_i + D_i + E_i \end{align*}\]
\[\begin{align*} A_i \sim\mathcal{N}(0,\sigma_A^2), C_i \sim\mathcal{N}(0,\sigma_C^2), D_i \sim\mathcal{N}(0,\sigma_D^2), E_i \sim\mathcal{N}(0,\sigma_E^2) \end{align*}\]
\[\begin{gather*} \cov(Y_{1},Y_{2}) = \\ \begin{pmatrix} \sigma_A^2 & 2\Phi\sigma_A^2 \\ 2\Phi\sigma_A^2 & \sigma_A^2 \end{pmatrix} + \begin{pmatrix} \sigma_C^2 & \sigma_C^2 \\ \sigma_C^2 & \sigma_C^2 \end{pmatrix} + \begin{pmatrix} \sigma_D^2 & \Delta_{7}\sigma_D^2 \\ \Delta_{7}\sigma_D^2 & \sigma_D^2 \end{pmatrix} + \begin{pmatrix} \sigma_E^2 & 0 \\ 0 & \sigma_E^2 \end{pmatrix} \end{gather*}\]
<- na.omit(twinbmi)
dd <- twinlm(bmi ~ age+gender, data=dd, DZ="DZ", zyg="zyg", id="tvparnr", type="sat")
l0
# different marginals (but within pair)
<- twinlm(bmi ~ age+gender, data=dd,DZ="DZ", zyg="zyg", id="tvparnr", type="flex")
lf
# same marginals but free correlation with MZ, DZ
<- twinlm(bmi ~ age+gender, data=dd, DZ="DZ", zyg="zyg", id="tvparnr", type="u")
lu estimate(lu,contr(5:6,6))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [atanh(rhoMZ)@1] - [a.... 0.4816 0.04177 0.3997 0.5635 9.431e-31
#>
#> Null Hypothesis:
#> [atanh(rhoMZ)@1] - [atanh(rhoDZ)@2] = 0
estimate(lu)
#> Estimate Std.Err 2.5% 97.5% P-value
#> bmi.1@1 18.6037 0.251036 18.1116 19.0957 0.000e+00
#> bmi.1~age.1@1 0.1189 0.005635 0.1078 0.1299 9.177e-99
#> bmi.1~gendermale.1@1 1.3848 0.086573 1.2151 1.5544 1.376e-57
#> log(var)@1 2.4424 0.022095 2.3991 2.4857 0.000e+00
#> atanh(rhoMZ)@1 0.7803 0.036249 0.7092 0.8513 9.008e-103
#> atanh(rhoDZ)@2 0.2987 0.020953 0.2576 0.3397 4.288e-46
<- twinlm(bmi ~ zyg, data=dd, DZ="DZ", zyg="zyg", id="tvparnr", type="flex")
lf coef(lf)
#> bmi.1@1 bmi.1@2 bmi.1~zygMZ.1@1 log(var(MZ))@1 atanh(rhoMZ)@1
#> 24.5832960 24.6575702 -0.3968696 2.5289387 0.8362321
#> bmi.1~zygMZ.1@2 log(var(DZ))@2 atanh(rhoDZ)@2
#> -0.3968689 2.5659593 0.3860756
###sink("lu-est-summary.txt")
<- twinlm(bmi ~ zyg, data=dd, DZ="DZ", zyg="zyg", id="tvparnr", type="u")
lu summary(lu)
#> ____________________________________________________
#> Group 1: MZ (n=1483)
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~zygMZ.1 -0.47114 0.10242 -4.60001 4.225e-06
#> Intercepts:
#> bmi.1 24.65757 0.05614 439.18694 <1e-12
#> Additional Parameters:
#> log(var) 2.55537 0.01699 150.37316 <1e-12
#> atanh(rhoMZ) 0.84865 0.02296 36.96325 <1e-12
#> ____________________________________________________
#> Group 2: DZ (n=2788)
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~zygMZ.1 -0.47114 0.10242 -4.60001 4.225e-06
#> Intercepts:
#> bmi.1 24.65757 0.05614 439.18694 <1e-12
#> Additional Parameters:
#> log(var) 2.55537 0.01699 150.37316 <1e-12
#> atanh(rhoDZ) 0.38268 0.01847 20.71970 <1e-12
#>
#> Estimate 2.5% 97.5%
#> Correlation within MZ: 0.69036 0.66607 0.71319
#> Correlation within DZ: 0.36503 0.33325 0.39598
#>
#> 'log Lik.' -22355.15 (df=5)
#> AIC: 44720.3
#> BIC: 44752.1
estimate(lu)
#> Estimate Std.Err 2.5% 97.5% P-value
#> bmi.1@1 24.6576 0.05650 24.5468 24.7683 0.000e+00
#> bmi.1~zygMZ.1@1 -0.4711 0.10155 -0.6702 -0.2721 3.489e-06
#> log(var)@1 2.5554 0.02019 2.5158 2.5949 0.000e+00
#> atanh(rhoMZ)@1 0.8486 0.03554 0.7790 0.9183 5.146e-126
#> atanh(rhoDZ)@2 0.3827 0.02095 0.3416 0.4237 1.545e-74
crossprod(iid(lu))^.5
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.05650271 NaN 0.019575810 0.01346928 NaN
#> [2,] NaN 0.10154621 0.004344490 NaN 0.015803415
#> [3,] 0.01957581 0.00434449 0.020190842 0.01053796 0.006669272
#> [4,] 0.01346928 NaN 0.010537960 0.03554047 NaN
#> [5,] NaN 0.01580342 0.006669272 NaN 0.020950321
###sink()
vcov(lu)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 3.152113e-03 -3.152113e-03 8.675199e-09 4.107103e-09 7.707502e-09
#> [2,] -3.152113e-03 1.049033e-02 -3.973997e-10 3.391234e-09 -5.078878e-09
#> [3,] 8.675199e-09 -3.973997e-10 2.887794e-04 1.367167e-04 9.170282e-05
#> [4,] 4.107103e-09 3.391234e-09 1.367167e-04 5.271249e-04 4.341482e-05
#> [5,] 7.707502e-09 -5.078878e-09 9.170282e-05 4.341482e-05 3.411139e-04
#> attr(,"det")
#> [1] 1.037717e+15
#> attr(,"pseudo")
#> [1] FALSE
#> attr(,"minSV")
#> [1] 85.77516
estimate(lu)
#> Estimate Std.Err 2.5% 97.5% P-value
#> bmi.1@1 24.6576 0.05650 24.5468 24.7683 0.000e+00
#> bmi.1~zygMZ.1@1 -0.4711 0.10155 -0.6702 -0.2721 3.489e-06
#> log(var)@1 2.5554 0.02019 2.5158 2.5949 0.000e+00
#> atanh(rhoMZ)@1 0.8486 0.03554 0.7790 0.9183 5.146e-126
#> atanh(rhoDZ)@2 0.3827 0.02095 0.3416 0.4237 1.545e-74
dim(iid(lu))
#> [1] 4271 5
estimate(lu,contr(4:5,5))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [atanh(rhoMZ)@1] - [a.... 0.466 0.04138 0.3849 0.5471 2.026e-29
#>
#> Null Hypothesis:
#> [atanh(rhoMZ)@1] - [atanh(rhoDZ)@2] = 0
estimate(coef=coef(lu),vcov=vcov(lu),contr(4:5,5))
#> Estimate Std.Err 2.5% 97.5% P-value
#> bmi.1@1 24.6576 0.05614 24.5475 24.7676 0.000e+00
#> bmi.1~zygMZ.1@1 -0.4711 0.10242 -0.6719 -0.2704 4.225e-06
#> log(var)@1 2.5554 0.01699 2.5221 2.5887 0.000e+00
#> atanh(rhoMZ)@1 0.8486 0.02296 0.8036 0.8936 4.462e-299
#> atanh(rhoDZ)@2 0.3827 0.01847 0.3465 0.4189 2.301e-95
wald.test(coef=coef(lu),vcov=vcov(lu),contrast=c(0,0,0,1,-1))
#> lin.comb se lower upper pval
#> B 0.465969 0.0279537 0.4111807 0.5207572 0
#>
#> Wald test
#>
#> data:
#> chisq = 277.87, df = 1, p-value < 2.2e-16
<- twinlm(bmi ~ ns(age,1)+gender, data=twinbmi,
l DZ="DZ", zyg="zyg", id="tvparnr", type="cor", missing=TRUE)
summary(l)
#> ____________________________________________________
#> Group 1
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~ns(age, 1).1 4.16937 0.16669 25.01334 <1e-12
#> bmi.1~gendermale.1 1.41160 0.07284 19.37839 <1e-12
#> Intercepts:
#> bmi.1 22.53618 0.07296 308.87100 <1e-12
#> Additional Parameters:
#> log(var) 2.44580 0.01425 171.68256 <1e-12
#> atanh(rhoMZ) 0.78217 0.02290 34.16186 <1e-12
#> ____________________________________________________
#> Group 2
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~ns(age, 1).1 4.16937 0.16669 25.01334 <1e-12
#> bmi.1~gendermale.1 1.41160 0.07284 19.37839 <1e-12
#> Intercepts:
#> bmi.1 22.53618 0.07296 308.87100 <1e-12
#> Additional Parameters:
#> log(var) 2.44580 0.01425 171.68256 <1e-12
#> atanh(rhoDZ) 0.29924 0.01848 16.19580 <1e-12
#>
#> Estimate 2.5% 97.5%
#> Correlation within MZ: 0.65395 0.62751 0.67889
#> Correlation within DZ: 0.29061 0.25712 0.32341
#>
#> 'log Lik.' -29020.12 (df=6)
#> AIC: 58052.24
#> BIC: 58093.29
A formal test of genetic effects can be obtained by comparing the MZ and DZ correlation:
estimate(l,contr(5:6,6))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [atanh(rhoMZ)@1] - [a.... 0.4829 0.04176 0.4011 0.5648 6.325e-31
#>
#> Null Hypothesis:
#> [atanh(rhoMZ)@1] - [atanh(rhoDZ)@3] = 0
<- twinlm(bmi ~ ns(age,1)+gender, data=twinbmi,
l DZ="DZ", zyg="zyg", id="tvparnr", type="cor", missing=TRUE)
summary(l)
#> ____________________________________________________
#> Group 1
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~ns(age, 1).1 4.16937 0.16669 25.01334 <1e-12
#> bmi.1~gendermale.1 1.41160 0.07284 19.37839 <1e-12
#> Intercepts:
#> bmi.1 22.53618 0.07296 308.87100 <1e-12
#> Additional Parameters:
#> log(var) 2.44580 0.01425 171.68256 <1e-12
#> atanh(rhoMZ) 0.78217 0.02290 34.16186 <1e-12
#> ____________________________________________________
#> Group 2
#> Estimate Std. Error Z value Pr(>|z|)
#> Regressions:
#> bmi.1~ns(age, 1).1 4.16937 0.16669 25.01334 <1e-12
#> bmi.1~gendermale.1 1.41160 0.07284 19.37839 <1e-12
#> Intercepts:
#> bmi.1 22.53618 0.07296 308.87100 <1e-12
#> Additional Parameters:
#> log(var) 2.44580 0.01425 171.68256 <1e-12
#> atanh(rhoDZ) 0.29924 0.01848 16.19580 <1e-12
#>
#> Estimate 2.5% 97.5%
#> Correlation within MZ: 0.65395 0.62751 0.67889
#> Correlation within DZ: 0.29061 0.25712 0.32341
#>
#> 'log Lik.' -29020.12 (df=6)
#> AIC: 58052.24
#> BIC: 58093.29
We also consider the ACE model
<- twinlm(bmi ~ age+gender, data=dd, DZ="DZ", zyg="zyg", id="tvparnr", type="ace") ace0
[korkeila_bmi_1991] Korkeila, Kaprio, Rissanen & Koskenvuo, Effects of gender and age on the heritability of body mass index, Int J Obes, 15(10), 647-654 (1991). ↩︎
[hjelmborg_bmi_2008] Hjelmborg, Fagnani, Silventoinen, McGue, Korkeila, Christensen, Rissanen & Kaprio, Genetic influences on growth traits of BMI: a longitudinal study of adult twins, Obesity (Silver Spring), 16(4), 847-852 (2008). ↩︎