-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathL6 - Ridge LASSO Prostate.R
157 lines (129 loc) · 6.24 KB
/
L6 - Ridge LASSO Prostate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#########################################################################
# Ridge regression and LASSO on Prostate Data
# Ridge using MASS::lm.ridge
# LASSO using glmnet package
# Splitting the data in half and modeling each half separately.
prostate <- read.table("C:\\Users\\Tom loughin\\sfuvault\\452 Statistical Learning\\R\\Prostate.csv",
header=TRUE, sep=",", na.strings=" ")
head(prostate)
# Splitting data in half using random uniform selection to make two "set"s.
set.seed(120401002)
set <- ifelse(runif(n=nrow(prostate))>0.5, yes=2, no=1)
library(MASS) # for ridge
library(glmnet) # for LASSO
###############################################################
# Ridge regression using lm.ridge()
#
# Functions much like lm(), but need to specify lambda.
# Can specify sequence of lambda values using seq(from= , to= , by=)
# to generate sequence of numbers
# Then use a version of CV to select best
seq(from=1, to=10, by=1)
seq(1, 10, 1)
seq(0, 2, 0.2)
# In my experience, optimal lambda can be <1 sometimes
# or as large as upper double digits. Running sequence
# from 0 to 100 by a small increment may be a little expensive
# but probably covers range.
ridge1 <- lm.ridge(lpsa ~., lambda = seq(0, 100, .05), data=prostate[set==1,])
# Show coefficient path
plot(ridge1)
select(ridge1)
(coef.ri.best1 = coef(ridge1)[which.min(ridge1$GCV),])
# Unfortunately, there is no predict() function for lm.ridge
# Have to compute predicted values manually using matrix multiplication
# Formula is
# as.matrix(cbind(1,DATA)) %*% coef(RIDGE MODEL)
# where DATA is the X-data for which you want predicted values,
# and RIDGE MODEL is the lm.ridge object you have created.
# The "1" adds a column of 1's for the intercept term.
#
# Here I can use the optimal lambda to make predictions
# on test data (set==2)
pred.ri1 = as.matrix(cbind(1,prostate[set==2,1:8])) %*% coef.ri.best1
MSPE1.ridge = mean((prostate[set==2,9]-pred.ri1)^2)
# Compare MSPE to MSPE from lm
mod.lm1 = lm(lpsa~., data=prostate[set==1,])
pred.lm1 = predict(mod.lm1, newdata=prostate[set==2,])
MSPE1.lm = mean((prostate[set==2,9]-pred.lm1)^2)
############
# Repeat for other half of data
ridge2 <- lm.ridge(lpsa ~., lambda = seq(0, 100, .05), data=prostate[set==2,])
# Show coefficient path
plot(ridge2)
select(ridge2)
(coef.ri.best2 = coef(ridge2)[which.min(ridge2$GCV),])
# Unfortunately, there is no predict() function for lm.ridge
# Have to compute predicted values manually using matrix multiplication
# Formula is
# as.matrix(cbind(1,DATA)) %*% coef(RIDGE MODEL)
# where DATA is the X-data for which you want predicted values,
# and RIDGE MODEL is the lm.ridge object you have created.
# The "1" adds a column of 1's for the intercept term.
#
# Here I can use the optimal lambda to make predictions
# on training data (set==1) and test data (set==2)
pred.ri2 = as.matrix(cbind(1,prostate[set==1,1:8])) %*% coef.ri.best2
MSPE2.ridge = mean((prostate[set==1,9]-pred.ri2)^2)
# Compare MSPE to MSPE from lm
mod.lm2 = lm(lpsa~., data=prostate[set==2,])
pred.lm2 = predict(mod.lm2, newdata=prostate[set==1,])
MSPE2.lm = mean((prostate[set==1,9]-pred.lm2)^2)
# glmnet() requires x to be in matrix class, so saving out
# the separate variables to be used as Y and X.
# glmnet() scales all columns of x in its calculations, but presents
# unscaled versions of coefficients.
y.1 <- prostate[set==1,1]
x.1 <- as.matrix(prostate[set==1,c(2:9)])
y.2 <- prostate[set==2,1]
x.2 <- as.matrix(prostate[set==2,c(2:9)])
# Fit LASSO by glmnet(y=, x=). Gaussian is default, but other families are available
# Function produces series of fits for many values of lambda.
# First half of data
lasso.1 <- glmnet(y=y.1, x= x.1, family="gaussian")
#("%Dev" in output below is R-square in linear regression)
lasso.1
plot(lasso.1) # Plots coefficient path
##### Note that these are on original scales, even though LASSO scaled variables
coef(lasso.1) # Lists out coefficients for each lambda
##### Note that glmnet() does not necessarily go all the way out to the LSEs
#####But they are close
# cv.glmnet() uses crossvalidation to estimate optimal lambda
# We haven't talked about this yet, so don't worry about it.
cv.lasso.1 <- cv.glmnet(y=y.1, x= x.1, family="gaussian")
cv.lasso.1
plot(cv.lasso.1) # Plot CV-MSPE
coef(cv.lasso.1) # Print out coefficients at optimal lambda
coef(cv.lasso.1, s=cv.lasso.1$lambda.min) # Another way to do this.
# Using the "+1SE rule" (see later) produces a sparser solution
coef(cv.lasso.1, s=cv.lasso.1$lambda.1se) # Another way to do this.
# Predict both halves using first-half fit
pred.las1.min <- predict(cv.lasso.1, newx=x.2, s=cv.lasso.1$lambda.min)
pred.las1.1se <- predict(cv.lasso.1, newx=x.2, s=cv.lasso.1$lambda.1se)
pred.las1 <- predict(cv.lasso.1, newx=x.2) #1SE is the default!
MSPE1.las.min <- mean((y.2 - pred.las1.min)^2)
MSPE1.las.1se <- mean((y.2 - pred.las1.1se)^2)
# Repeat for second half of data
lasso.2 <- glmnet(y=y.2, x= x.2, family="gaussian")
#("%Dev" in output below is R-square in linear regression)
lasso.2
plot(lasso.2) # Plots coefficient path
##### Note that these are on original scales, even though LASSO scaled variables
coef(lasso.2) # Lists out coefficients for each lambda
##### Note that glmnet() does not necessarily go all the way out to the LSEs
#####But they are close
# cv.glmnet() uses crossvalidation to estimate optimal lambda
# We haven't talked about this yet, so don't worry about it.
cv.lasso.2 <- cv.glmnet(y=y.2, x= x.2, family="gaussian")
cv.lasso.2
plot(cv.lasso.2) # Plot CV-MSPE
coef(cv.lasso.2) # Print out coefficients at optimal lambda
coef(cv.lasso.2, s=cv.lasso.2$lambda.min) # Another way to do this.
# Using the "+1SE rule" (see later) produces a sparser solution
coef(cv.lasso.2, s=cv.lasso.2$lambda.1se) # Another way to do this.
# Predict both halves using first-half fit
pred.las2.min <- predict(cv.lasso.2, newx=x.2, s=cv.lasso.2$lambda.min)
pred.las2.1se <- predict(cv.lasso.2, newx=x.2, s=cv.lasso.2$lambda.1se)
pred.las2 <- predict(cv.lasso.2, newx=x.2) #1SE is the default!
MSPE2.las.min <- mean((y.2 - pred.las2.min)^2)
MSPE2.las.1se <- mean((y.2 - pred.las2.1se)^2)