Log-Likelihood Visualization for Archimedean Copulas

Auxiliary functions

We start by defining the following auxiliary functions.

##' @title [m]inus Log-Likelihood for Archimedean Copulas ("fast version")
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##'        the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return negative log-likelihood
##' @author Martin Maechler (Marius originally)
mLogL <- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
    -sum(acop@dacopula(u, theta, n.MC=n.MC, log=TRUE, ...))
}

##' @title Plotting the Negative Log-Likelihood for Archimedean Copulas
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d  data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return invisible()
##' @author Martin Maechler
curveLogL <- function(cop, u, xlim, main, XtrArgs=list(), ...) {
    unam <- deparse(substitute(u))
    stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
              (d <- ncol(u)) >= 2, d == dim(cop),
              length(cop@childCops) == 0# not yet *nested* A.copulas
              )
    acop <- cop@copula
    th. <- acop@theta # the true theta
    acop <- setTheta(acop, NA) # so it's clear, the true theta is not used below
    if(missing(main)) {
        tau. <- cop@copula@tau(th.)
        main <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
               FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
               list(UU = unam,
                TXT= sprintf("; n=%d, d=%d;  A.cop",
                         nrow(u), d),
                FUN = acop@name,
                Th = format(th.,digits=3),
                Tau = format(tau., digits=3)))
    }
    r <- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
               xlim=xlim, main=main,
               xlab = expression(theta),
               ylab = substitute(- log(L(theta, u, ~~ COP)), list(COP=acop@name)),
               ...)
    if(is.finite(th.))
        axis(1, at = th., labels=expression(theta["*"]),
             lwd=2, col="dark gray", tck = -1/30)
    else warning("non-finite cop@copula@theta = ", th.)
    axis(1, at = initOpt(acop@name),
         labels = FALSE, lwd = 2, col = 2, tck = 1/20)
    invisible(r)
}

Ensure that we are told about it, if the numerical algorithms choose methods using Rmpfr (R package interfacing to multi precision arithmetic MPFR):

op <- options("copula:verboseUsingRmpfr"=TRUE) # see when "Rmpfr" methods are chosen automatically

Joe’s family

Easy case (\(\tau=0.2\))

n <- 200
d <- 100
tau <- 0.2
theta <- copJoe@iTau(tau)
cop <- onacopulaL("Joe", list(theta,1:d))
theta

## [1] 1.443824

Here, the three different methods work “the same”:

set.seed(1)
U1 <- rnacopula(n,cop)
enacopula(U1, cop, "mle") # 1.432885 --  fine

## [1] 1.432898

th4 <- 1 + (1:4)/4
mL.tr <- c(-3558.5, -3734.4, -3299.5, -2505.)
mLt1 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log.poly")) # default
mLt2 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log1p"))
mLt3 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="poly"))
stopifnot(all.equal(mLt1, mL.tr, tolerance=5e-5),
          all.equal(mLt2, mL.tr, tolerance=5e-5),
          all.equal(mLt3, mL.tr, tolerance=5e-5))
system.time(r1l  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log.poly")))

##    user  system elapsed 
##   0.415   0.002   0.374

mtext("all three polyJ() methods on top of each other")
system.time({
    r1J <- curveLogL(cop, U1, c(1, 2.5), X=list(method="poly"),
                     add=TRUE, col=adjustcolor("red", .4))
    r1m  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log1p"),
                      add=TRUE, col=adjustcolor("blue",.5))
})

##    user  system elapsed 
##   0.667   0.000   0.671

U2 <- rnacopula(n,cop)
summary(dCopula(U2, cop)) # => density for the *correct* parameter looks okay

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  4.900e+01  6.430e+02 2.777e+175  1.932e+04 5.553e+177

## hmm: max = 5.5e177
if(doExtras)
    system.time(r2 <- curveLogL(cop, U2, c(1, 2.5)))
stopifnot(all.equal(enacopula(U2, cop, "mle"), 1.43992755, tolerance=1e-5),
          all.equal(mLogL(1.8, cop@copula, U2), -4070.1953,tolerance=1e-5)) # (was -Inf)

U3 <- rnacopula(n,cop)
(th. <- enacopula(U3, cop, "mle")) # 1.4495

## [1] 1.449569

system.time(r3 <- curveLogL(cop, U3, c(1, 2.5)))

##    user  system elapsed 
##   0.326   0.000   0.331

axis(1, at = th., label = quote(hat(theta)))

U4 <- rnacopula(n,cop)
enacopula(U4, cop, "mle") # 1.4519  (prev. was 2.351 : "completely wrong")

## [1] 1.451916

summary(dCopula(U4, cop)) # ok (had one Inf)

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  7.500e+01  9.080e+02 1.981e+259  1.434e+04 3.961e+261

if(doExtras)
    system.time(r4 <- curveLogL(cop, U4, c(1, 2.5)))
mLogL(2.2351, cop@copula, U4)

## [1] -1789.59

mLogL(1.5,    cop@copula, U4)

## [1] -3882.819

mLogL(1.2,    cop@copula, U4)

## [1] -3517.366

if(doExtras) # each curve takes almost 2 sec
    system.time({
        curveLogL(cop, U4, c(1, 1.01))
        curveLogL(cop, U4, c(1, 1.0001))
        curveLogL(cop, U4, c(1, 1.000001))
    })
## --> limit goes *VERY* steeply up to  0
## --> theta 1.164 is about the boundary:
stopifnot(identical(setTheta(cop, 1.164), onacopula(cop@copula, C(1.164, 1:100))),
      all.equal(600.59577,
            cop@copula@dacopula(U4[118,,drop=FALSE],
                    theta=1.164, log = TRUE), tolerance=1e-5)) # was "Inf"

Harder case (\(d=150\), \(\tau=0.3\))

n <- 200
d <- 150
tau <- 0.3
(theta <- copJoe@iTau(tau))

## [1] 1.772108

cop <- onacopulaL("Joe",list(theta,1:d))

set.seed(47)
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 1.784578

## [1] 1.78459

system.time(r. <- curveLogL(cop, U., c(1.1, 3)))

##    user  system elapsed 
##   0.455   0.000   0.461

## => still looks very good

Even harder case (\(d=180\), \(\tau=0.4\))

d <- 180
tau <- 0.4
(theta <- copJoe@iTau(tau))

## [1] 2.219066

cop <- onacopulaL("Joe",list(theta,1:d))

U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 2.217582

## [1] 2.217593

if(doExtras)
system.time(r. <- curveLogL(cop, U., c(1.1, 4)))
## => still looks very good

Gumbel’s family

Easy case (\(\tau=0.2\))

n <- 200
d <- 50 # smaller 'd' -- so as to not need 'Rmpfr' here
tau <- 0.2
(theta <- copGumbel@iTau(tau))

## [1] 1.25

cop <- onacopulaL("Gumbel",list(theta,1:d))

set.seed(1)
U1 <- rnacopula(n,cop)
if(doExtras) {
    U2 <- rnacopula(n,cop)
    U3 <- rnacopula(n,cop)
}
enacopula(U1, cop, "mle") # 1.227659 (was 1.241927)

## [1] 1.227659

##--> Plots with "many" likelihood evaluations
system.time(r1 <- curveLogL(cop, U1, c(1, 2.1)))

##    user  system elapsed 
##   0.447   0.000   0.449

if(doExtras) system.time({
    mtext("and two other generated samples")
    r2 <- curveLogL(cop, U2, c(1, 2.1), add=TRUE)
    r3 <- curveLogL(cop, U3, c(1, 2.1), add=TRUE)
})

Harder case (\(d=150\), \(\tau=0.6\))

d <- 150
tau <- 0.6
(theta <- copGumbel@iTau(tau))

## [1] 2.5

cG.5 <- onacopulaL("Gumbel",list(theta,1:d))

set.seed(17)
U4 <- rnacopula(n,cG.5)
U5 <- rnacopula(n,cG.5)
U6 <- rnacopula(n,cG.5)
if(doExtras) { ## "Rmpfr" is used {2012-06-21}: -- therefore about 18 seconds!
 tol <- if(interactive()) 1e-12 else 1e-8
 print(system.time(
 ee. <- c(enacopula(U4, cG.5, "mle", tol=tol),
          enacopula(U5, cG.5, "mle", tol=tol),
          enacopula(U6, cG.5, "mle", tol=tol))))
dput(ee.)# in case the following fails
## tol=1e-12 Linux nb-mm3 3.2.0-25-generic x86_64 (2012-06-23):
##   c(2.47567251789004, 2.48424484287686, 2.50410767129408)
##   c(2.475672518,      2.484244763,      2.504107671),
stopifnot(all.equal(ee., c(2.475672518, 2.484244763, 2.504107671),
            tolerance= max(1e-7, 16*tol)))
}
## --> Plots with "many" likelihood evaluations
th. <- seq(1, 3, by= 1/4)
if(doExtras) # "default2012" (polyG default) partly uses Rmpfr here:
system.time(r4   <- sapply(th., mLogL, acop=cG.5@copula, u=U4))## 25.6 sec
## whereas this (polyG method) is very fast {and still ok}:
system.time(r4.p <- sapply(th., mLogL, acop=cG.5@copula, u=U4, method="pois"))

##    user  system elapsed 
##   0.107   0.000   0.108

r4. <- c(0, -18375.33, -21948.033, -24294.995, -25775.502,
         -26562.609, -26772.767, -26490.809, -25781.224)
stopifnot(!doExtras ||
          all.equal(r4,   r4., tolerance = 8e-8),
          all.equal(r4.p, r4., tolerance = 8e-8))
## --> use fast method here as well:
system.time(r5.p <- sapply(th., mLogL, acop=cG.5@copula, u=U5, method="pois"))

##    user  system elapsed 
##   0.106   0.000   0.107

system.time(r6.p <- sapply(th., mLogL, acop=cG.5@copula, u=U6, method="pois"))

##    user  system elapsed 
##   0.107   0.000   0.107

if(doExtras) {
    if(FALSE) # for speed analysis, etc
        debug(copula:::polyG)
    mLogL(1.65, cG.5@copula, U4) # -23472.96
}
dd <- dCopula(U4, setTheta(cG.5, 1.64), log = TRUE,
              method = if(doExtras)"default" else "pois")
summary(dd)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.59   53.30   81.09  116.91  137.54  707.13

stopifnot(!is.na(dd), # no NaN's anymore
      40 < range(dd), range(dd) < 710)

Frank’s family (an already hard case)

n <- 64
d <- 5
tau <- 0.8
(theta <- copFrank@iTau(tau))

## [1] 18.19154

cop <- onacopulaL("Frank", list(theta,1:d))

set.seed(11) # these seeds give no problems: 101, 41, 21
U. <- rnacopula(n,cop)
cop@copula <- setTheta(cop@copula, NA) # forget the true theta
system.time(f.ML <- emle(U., cop)); f.ML # --> fine: theta = 18.033, Log-lik = 314.01

##    user  system elapsed 
##   0.014   0.000   0.015

## 
## Call:
## bbmle::mle2(minuslogl = nLL, start = start, optimizer = "optimize", 
##     lower = interval[1], upper = interval[2])
## 
## Coefficients:
##   theta 
## 18.0333 
## 
## Log-likelihood: 314.01

if(doExtras)
    system.time(f.mlMC <- emle(U., cop, n.MC = 1e4)) # with MC
stopifnot(all.equal(unname(coef(f.ML)), 18.03331, tolerance= 1e-6),
      all.equal(f.ML@min, -314.0143, tolerance=1e-6),
      !doExtras || ## Simulate MLE (= SMLE) is "extra" random,  hmm...
      all.equal(unname(coef(f.mlMC)), 17.8, tolerance= 0.01)
      ##           64-bit ubuntu: 17.817523
      ##         ? 64-bit Mac:    17.741
     )

cop@copula <- setTheta(cop@copula, theta)
r. <- curveLogL(cop, U., c(1, 200)) # => now looks fine

tail(as.data.frame(r.), 15)

##          x        y
## 87  172.14 2105.690
## 88  174.13 2143.642
## 89  176.12 2181.637
## 90  178.11 2219.675
## 91  180.10 2257.754
## 92  182.09 2295.874
## 93  184.08 2334.034
## 94  186.07 2372.232
## 95  188.06 2410.468
## 96  190.05 2448.742
## 97  192.04 2487.051
## 98  194.03 2525.396
## 99  196.02 2563.776
## 100 198.01 2602.189
## 101 200.00 2640.636

stopifnot( is.finite( r.$y ),
      ## and is convex (everywhere):
      diff(r.$y, d=2) > 0)
options(op) # revert to previous state

Session information

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 34 (Thirty Four)
## 
## Matrix products: default
## BLAS:   /usr/local64.sfs/app/R/R-4.2.0-inst/lib/libRblas.so
## LAPACK: /usr/local64.sfs/app/R/R-4.2.0-inst/lib/libRlapack.so
## 
## attached base packages:
##  [1] parallel  grid      stats4    tools     stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
## [1] rugarch_1.4-8   gsl_2.1-7.1     mev_1.14        lattice_0.20-45
## [5] bbmle_1.0.25    copula_1.1-0   
## 
## loaded via a namespace (and not attached):
##  [1] zoo_1.8-10                  xfun_0.31                  
##  [3] bslib_0.3.1                 partitions_1.10-4          
##  [5] ks_1.13.5                   pcaPP_2.0-1                
##  [7] SkewHyperbolic_0.4-0        htmltools_0.5.2            
##  [9] gmp_0.6-5                   yaml_2.3.5                 
## [11] pracma_2.3.8                rlang_1.0.2                
## [13] jquerylib_0.1.4             stringr_1.4.0              
## [15] pspline_1.0-19              bdsmatrix_1.3-6            
## [17] mvtnorm_1.1-3               evaluate_0.15              
## [19] knitr_1.39                  fastmap_1.1.0              
## [21] ADGofTest_0.3               evd_2.3-6                  
## [23] highr_0.9                   xts_0.12.1                 
## [25] Rcpp_1.0.8.3                polynom_1.4-1              
## [27] KernSmooth_2.23-20          DistributionUtils_0.6-0    
## [29] jsonlite_1.8.0              truncnorm_1.0-8            
## [31] alabama_2022.4-1            spd_2.0-1                  
## [33] digest_0.6.29               stringi_1.7.6              
## [35] mathjaxr_1.6-0              numDeriv_2016.8-1.1        
## [37] Runuran_0.36                stabledist_0.7-1           
## [39] cli_3.3.0                   magrittr_2.0.3             
## [41] sass_0.4.1                  nleqslv_3.3.2              
## [43] Rsolnp_1.16                 GeneralizedHyperbolic_0.8-4
## [45] MASS_7.3-57                 Matrix_1.4-1               
## [47] rmarkdown_2.14              R6_2.5.1                   
## [49] mclust_5.4.10               compiler_4.2.0

Log-Likelihood Visualization for Archimedean Copulas

Marius Hofert and Martin Mächler

2022-06-14

Intro

Auxiliary functions

Joe’s family

Easy case (\(\tau=0.2\))

Harder case (\(d=150\), \(\tau=0.3\))

Even harder case (\(d=180\), \(\tau=0.4\))

Gumbel’s family

Easy case (\(\tau=0.2\))

Harder case (\(d=150\), \(\tau=0.6\))

Frank’s family (an already hard case)

Session information