pen-s-example.R


#############################################################################
#  Example script
#  R-code with functions implementing the penalised S-estimators used in the 
#  paper:  "S-Estimation for Penalized Regression Splines" 
#  published in the Journal of Computational and Graphical Statistics
#
#  Kukatharmini Tharmaratnam, Gerda Claeskens, Christophe Croux, and Matias 
#  Salibian-Barrera.
#  This file also includes a comparison with penalized least squares and 
#  penalized M-estimation methods.
#############################################################################

source('pen-s-functions.R')

set.seed(17)


n <- 100   # sample size
NN <- 500  # max. no. of iterations for S-estimator
NNN <- 500 # no. of initial candidates for the S-estimator
p0 <- .25   # prob of outlier generating mechanism
y0 <- 20   # location of outliers
sd0 <- 2   # sd of outliers

# grid of penalty parameters
# for the GCV search
lambdas <- seq(.1, 2, length=100)

# tuning constants for the S-estimator
cc <- 1.54764
b <- .5

# Cubic polynomials
p <- 3

# explanatory variables (x's)
x <- sort(runif(n,min=-1,max=1))

# true mean function
muf <- sin(pi*x)

# build the design matrix
num.knots <- max(5,min(floor(length(unique(x))/4),35))
knots <- quantile(unique(x),seq(0,1,length=num.knots+2))[-c(1,(num.knots+2))]
xpoly <-rep(1,n)
for (j in 1:p) xpoly<-cbind(xpoly,x^j)
xspline <- outer(x,knots,"-")
xspline <- pmax(xspline, 0)^p
#xspline <- xspline*(xspline>0)
X <- cbind(xpoly,xspline)

# penalty matrix
D <- diag(c(rep(0,ncol(xpoly)),rep(1,ncol(xspline))))

# error
e <- rnorm(n, mean=0, sd=.7)

# response variable
y <- muf + e

# outliers?
index <- (1:n)[ runif(n) < p0 ]
y[index] <- rnorm(length(index),y0,sd0)

# plot the data
plot(x,y, main='Comparison of penalized regression estimators')

# add the true mean function to the plot
lines(x, muf, lwd=3, col='black')

# penalized LS fit (with GCV)
tmp.ls <- pen.ls.gcv(y, X, D, lambdas)

# add the penalized LS estimate to the plot
lines(x, tmp.ls$yhat, lwd=3, col='red')

# penalized M-estimator (with robust CV)
tmp.m <- pen.m.rcv(y=y, X=X, N=NN, D=D, lambdas=lambdas, num.knots=num.knots, p=p, epsilon=1e-6) 

# add the penalized M estimate to the plot
lines(x, tmp.m$yhat, lwd=3, col='green')

# penalized S-estimator (with robust GCV)
tmp.s <- pen.s.rgcv(y=y, X=X, D=D, lambdas=lambdas, num.knots=num.knots, p=p, NN=NN, cc=cc, b=b, NNN=50)

# add the penalized S estimate to the plot
lines(x, tmp.s$yhat, lwd=3, col='blue')

legend(-.75, 15, legend=c("LS", "M", "S", "True"), 
       col=c('red', 'green', 'blue', 'black'), lwd=3)