Introduction

This file contains some examples of the functions related to the Wrap routine. More specifically the functions wrap and estLocScale.

library("cellWise")

Simple example: Stars data

# load the data

X <- robustbase::starsCYG; plot(X)

plot of chunk unnamed-chunk-3

cutoff <- sqrt(qchisq(0.975, ncol(X)))
X.mean <- colMeans(X)
X.cov  <- cov(X)
cor(X)

##               log.Te  log.light
## log.Te     1.0000000 -0.2104133
## log.light -0.2104133  1.0000000

locScaleX <- estLocScale(X)
Xw <- wrap(X, locScaleX$loc, locScaleX$scale)$Xw
Xw.mean <- colMeans(Xw)
Xw.cov <- cov(Xw)
cor(Xw)

##           [,1]      [,2]
## [1,] 1.0000000 0.5732006
## [2,] 0.5732006 1.0000000

# classical Mahalanobis distances
# 

md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
     ylab = "Mahalanobis distance",
     xlab = "Index",
     main = "Mahalanobis distances",
     xlim = c(0, nrow(X)), ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")

plot of chunk unnamed-chunk-3

# wrapping-based robust distances
# 

rd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
plot(1:nrow(X), rd, pch = 16, cex = 0.9,
     ylab = "robust distance",
     xlab = "Index",
     main = "Wrapping-based robust distances",
     xlim = c(0, nrow(X)), ylim = c(0, max(rd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")

plot of chunk unnamed-chunk-3

# classical and wrapped tolerance ellipse
#

ell1 <- ellipse::ellipse(X.cov, centre = X.mean, t = cutoff, npoints = 120)
ell2 <- ellipse::ellipse(Xw.cov, centre = Xw.mean, t = cutoff, npoints = 120)
xmin <- min(min(X[, 1]), min(ell1[, 1]), min(ell2[, 1]))
xmax <- max(max(X[, 1]), max(ell1[, 1]), max(ell2[, 1]))
ymin <- min(min(X[, 2]), min(ell1[, 2]), min(ell2[, 2]))
ymax <- max(max(X[, 2]), max(ell1[, 2]), max(ell2[, 2]))
myxlim <- c(xmin, xmax); myylim <- c(ymin, ymax)
plot(X, xlab = "X1", ylab = "X2", main = "Stars: classical and wrapping ellipse",
     pch = 16, col = "black", xlim = myxlim, ylim = myylim)
points(ell1, type = "l", lwd = 1.5, col = "red")
points(ell2, type = "l", lwd = 1.5, col = "blue")

plot of chunk unnamed-chunk-3

Bushfire dataset: a 5-dimensional example

?robustbase::bushfire
X <- as.matrix(robustbase::bushfire); pairs(X)

plot of chunk unnamed-chunk-4

cutoff <- sqrt(qchisq(0.975, ncol(X))); cutoff

## [1] 3.582248

X.mean <- colMeans(X); X.cov <- cov(X)
round(cor(X), 2)

##       V1    V2    V3    V4    V5
## V1  1.00  0.80 -0.59 -0.49 -0.49
## V2  0.80  1.00 -0.53 -0.53 -0.52
## V3 -0.59 -0.53  1.00  0.97  0.98
## V4 -0.49 -0.53  0.97  1.00  1.00
## V5 -0.49 -0.52  0.98  1.00  1.00

estX <- cellWise::estLocScale(X)
Xw <- cellWise::wrap(X, estX$loc, estX$scale)$Xw
Xw.mean <- colMeans(Xw); Xw.cov <- cov(Xw)
round(cor(Xw), 2)

##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,]  1.00  0.78 -0.62 -0.52 -0.53
## [2,]  0.78  1.00 -0.25 -0.14 -0.15
## [3,] -0.62 -0.25  1.00  0.98  0.99
## [4,] -0.52 -0.14  0.98  1.00  1.00
## [5,] -0.53 -0.15  0.99  1.00  1.00

wd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
weights <- (wd < cutoff) + 0
# Reweighting functions:
wmean <- function(X, w) { # weighted mean
  wmean1 <- function(y, w) { sum(y * w) / sum(w) }
  apply(X, 2, FUN = wmean1, w = w)
}
wcov <- function(X, w) { # weighted cov
  Xc <- sweep(X, 2, wmean(X, w), "-")
  Xc <- sweep(Xc, 1, sqrt(w), "*")
  t(Xc) %*% Xc / sum(w)
}
X.wmean <- wmean(X, weights)
X.wcov <- wcov(X, weights)
round(cov2cor(X.wcov), 2)

##       V1    V2    V3    V4    V5
## V1  1.00  0.86 -0.74 -0.62 -0.63
## V2  0.86  1.00 -0.34 -0.19 -0.20
## V3 -0.74 -0.34  1.00  0.97  0.98
## V4 -0.62 -0.19  0.97  1.00  1.00
## V5 -0.63 -0.20  0.98  1.00  1.00

# Classical Mahalanobis distances
# 
md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
     ylab = "Mahalanobis distance", xlab = "Index",
     main = "Mahalanobis distances", xlim = c(0, nrow(X)),
     ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")

plot of chunk unnamed-chunk-4

# No outliers stand out here.


# Wrapping-based robust distances
# 
rwd <- sqrt(mahalanobis(X, center = X.wmean, cov = X.wcov))
plot(1:nrow(X),rwd,pch = 16, cex = 0.9,
     ylab = "robust distance", xlab = "Index",
     main = "Wrapping-based robust distances", xlim = c(0, nrow(X)),
     ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")

plot of chunk unnamed-chunk-4

# Many outliers are visible, the same as in Maronna-Yohai 1995.


# Distance-distance plot:
#
plot(md, rwd, pch = 16, cex = 0.9, ylab = "robust distance",
     xlab = "classical distance", main = "distance-distance plot",
     xlim = c(0, max(md, cutoff) + 0.2),
     ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(v = cutoff, lwd = 1.5, col = "red")
abline(h = cutoff, lwd = 1.5, col = "red")
abline(0, 1, lwd = 1.5, lty = 2)

plot of chunk unnamed-chunk-4

# only the robust distances detect the outliers


# Classical and wrapped tolerance ellipses:
#
coordgrid <- expand.grid(1:dim(X)[2], 1:dim(X)[2])
coordgrid <- coordgrid[-(1 + (1:dim(X)[2] - 1) * (dim(X)[2] + 1)), ]
i = 1
pairs(X, panel = function(x, y) {
        points(x, y)
        inds <- as.numeric(coordgrid[i, ])
        ell1 <- ellipse::ellipse(X.cov[inds, inds], centre = X.mean[inds],
                                 t = cutoff, npoints = 120)
        ell2 <- ellipse::ellipse(X.wcov[inds, inds], centre = X.wmean[inds],
                                 t = cutoff, npoints = 120)
        points(ell1, type = "l", lwd = 1.5, col = "red")
        points(ell2, type = "l", lwd = 1.5, col = "blue")
        i <<- i + 1
      })

plot of chunk unnamed-chunk-4

Dog walker video example

data(data_dogWalker)
# keep a copy of the data for later:
X <- data_dogWalker

# Combine all pixels of an image in a row:
dims <- dim(X); dims # 54 144 180   3

## [1]  54 144 180   3

dim(X) <- c(dims[1], prod(dims[2:4]))
dim(X) #  54 77760

## [1]    54 77760

# Estimate location and scale using estLocScale:
locScaleX <- estLocScale(X, precScale = 1e-12)

## Warning in estLocScale(X, precScale = 1e-12): 85  out of  77760  variables have an estimated scale <= 
## "precScale" =  1e-12 .

# Preprocess the data to deal with zeroscales:
zeropix <- which(locScaleX$scale < 1e-12)
nzero <- length(zeropix) 
filler <- 2 * ((1:dims[1]) - (dims[1] + 1) / 2) / (dims[1] - 1)
filler <- 0.9 / 255 * matrix(rep(filler, nzero), ncol = nzero, byrow = F)
centers <- apply(X[, zeropix], 2, FUN = median)
centers <- pmax(0, pmin(1, centers)); 
filler <- sweep(filler, 2, centers, "+")
X[, zeropix] <- filler
locScaleX$scale[zeropix] <- estLocScale(X[, zeropix])$scale

# Wrap the data
Xw <- wrap(X, locScaleX$loc, locScaleX$scale)$Xw

# center the wrapped data
aveXw <- colMeans(Xw)
XwC <- sweep(Xw, 2, aveXw)

# Compute singular value decomposition of centered wrapped data
prcXwz <- svd::propack.svd(XwC, neig = 3) 
rloadings <- prcXwz$v; dim(rloadings)

## [1] 77760     3

Xwscores <- XwC %*% rloadings; dim(Xwscores)

## [1] 54  3

## Project original data on robust loadings:
XC <- sweep(X, 2, aveXw)
robscores <- XC %*% rloadings

# Compute the fitted frames and residuals:
Xznew.rfitted <- robscores %*% t(rloadings[, 1:3])
X.rfitted <- sweep(Xznew.rfitted, 2, aveXw, "+")
X.rresidual <- X - X.rfitted

# Plot the first loading
rim1 <- pmax(0, pmin(rloadings[, 1] + locScaleX$loc, 1))
dim(rim1) <- dims[2:4]; grid::grid.newpage()
grid::grid.raster(rim1, interpolate = FALSE)

plot of chunk unnamed-chunk-5

# Compute location and scale of the residuals
res.med <- median(X.rresidual) 
res.mad <-  mad(X.rresidual) 
res <- (X.rresidual - res.med) / res.mad

# Make plots
frameinds <- c(10, 20, 30, 40)
cutoff <- 200
for (i in 1:4) { 
  index <- frameinds[i]
  restemp <- res[index,];
  dim(restemp) <- c(prod(dims[2:3]), dims[4]);
  res2 <- restemp[, 1]^2 + restemp[, 2]^2 + restemp[, 3]^2
  mask <- rep(0, prod(dims[2:3])); mask[res2 > cutoff] <- 1
  mask <- cbind(mask,mask,mask);   dim(mask) <- dims[2:4]
  realim <- data_dogWalker[index, , , ]; 
  realim <- pmax(0, pmin(realim, 1)); dim(realim) <- dims[2:4]
  masked <- realim * mask; dim(masked)
  masked <- pmax(0, pmin(masked, 1)); dim(masked) <- dims[2:4]
  masked[masked == 0] <- 0.5 
  finalim <- array(0, dim = c(dims[2], 2*dims[3], dims[4]))
  finalim[, 1:dims[3], ] <- realim
  finalim[, (dims[3] + 1):(2 * dims[3]), ] <- masked
  grid::grid.newpage()
  grid::grid.raster(finalim,interpolate = FALSE)
}

plot of chunk unnamed-chunk-5