-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpen-s-example.R
93 lines (67 loc) · 2.54 KB
/
pen-s-example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#############################################################################
# Example script
# R-code with functions implementing the penalised S-estimators used in the
# paper: "S-Estimation for Penalized Regression Splines"
# published in the Journal of Computational and Graphical Statistics
#
# Kukatharmini Tharmaratnam, Gerda Claeskens, Christophe Croux, and Matias
# Salibian-Barrera.
# This file also includes a comparison with penalized least squares and
# penalized M-estimation methods.
#############################################################################
source('pen-s-functions.R')
set.seed(17)
n <- 100 # sample size
NN <- 500 # max. no. of iterations for S-estimator
NNN <- 500 # no. of initial candidates for the S-estimator
p0 <- .25 # prob of outlier generating mechanism
y0 <- 20 # location of outliers
sd0 <- 2 # sd of outliers
# grid of penalty parameters
# for the GCV search
lambdas <- seq(.1, 2, length=100)
# tuning constants for the S-estimator
cc <- 1.54764
b <- .5
# Cubic polynomials
p <- 3
# explanatory variables (x's)
x <- sort(runif(n,min=-1,max=1))
# true mean function
muf <- sin(pi*x)
# build the design matrix
num.knots <- max(5,min(floor(length(unique(x))/4),35))
knots <- quantile(unique(x),seq(0,1,length=num.knots+2))[-c(1,(num.knots+2))]
xpoly <-rep(1,n)
for (j in 1:p) xpoly<-cbind(xpoly,x^j)
xspline <- outer(x,knots,"-")
xspline <- pmax(xspline, 0)^p
#xspline <- xspline*(xspline>0)
X <- cbind(xpoly,xspline)
# penalty matrix
D <- diag(c(rep(0,ncol(xpoly)),rep(1,ncol(xspline))))
# error
e <- rnorm(n, mean=0, sd=.7)
# response variable
y <- muf + e
# outliers?
index <- (1:n)[ runif(n) < p0 ]
y[index] <- rnorm(length(index),y0,sd0)
# plot the data
plot(x,y, main='Comparison of penalized regression estimators')
# add the true mean function to the plot
lines(x, muf, lwd=3, col='black')
# penalized LS fit (with GCV)
tmp.ls <- pen.ls.gcv(y, X, D, lambdas)
# add the penalized LS estimate to the plot
lines(x, tmp.ls$yhat, lwd=3, col='red')
# penalized M-estimator (with robust CV)
tmp.m <- pen.m.rcv(y=y, X=X, N=NN, D=D, lambdas=lambdas, num.knots=num.knots, p=p, epsilon=1e-6)
# add the penalized M estimate to the plot
lines(x, tmp.m$yhat, lwd=3, col='green')
# penalized S-estimator (with robust GCV)
tmp.s <- pen.s.rgcv(y=y, X=X, D=D, lambdas=lambdas, num.knots=num.knots, p=p, NN=NN, cc=cc, b=b, NNN=50)
# add the penalized S estimate to the plot
lines(x, tmp.s$yhat, lwd=3, col='blue')
legend(-.75, 15, legend=c("LS", "M", "S", "True"),
col=c('red', 'green', 'blue', 'black'), lwd=3)