Today, I continued to finish Computing part of Chapter 6.
Codes (ordianry linear regression):
corthresh=0.9
toohigh=findCorrelation(cor(solTrainXtrans), corthresh)
corrpred=names(solTrainXtrans)[toohigh]
trainxfiltered=solTrainXtrans[, -toohigh]
testxfiltered=solTestXtrans[, -toohigh]
set.seed(100)
lmfiltered=train(x=trainxfiltered, y=solTrainY, method = "lm", trControl = ctr1)
lmfiltered
#solTrainY: 951*1; solTrainXtrans: 951*228; solTestXtrans: 316*228;
#trainxfiltered: 951*190; testxfiltered: 316*190
#the codes in P132 of Applied Predictive Modeling may be wrong
set.seed(100)
rlmpca=train(solTrainXtrans, solTrainY, method = "rlm", preProcess = "pca", trControl = ctr1)
#wait a minute
rlmpca
Codes (partial least squares):
library(pls)
plsfit=plsr(solubility~., data=trainingdata)
predict(plsfit, solTestXtrans[1:5,], ncomp = 1:2)
set.seed(100)
plstune=train(solTrainXtrans, solTrainY, method = "pls", tuneLength = 20, trControl = ctr1, preProcess = c("center", "scale"))
plstune
Codes (penalized regression models):
library(elasticnet)
#create ridge-regression models
#as lambda decreases to 0, we get the least squares solutions. lambda argument specifies the ridge-regression penalty
ridgemodel=enet(x=as.matrix(solTrainXtrans), y=solTrainY, lambda = 0.001)
ridgemodel
ridgepred=predict(ridgemodel, newx = as.matrix(solTestXtrans), s=1, model="fraction", type = "fit")
ridgepred
head(ridgepred$fit)
#define the candidate set of values
ridgegrid=data.frame(.lambda=seq(0, 0.1, length=15))
set.seed(100)
ridgeregfit=train(solTrainXtrans, solTrainY, method="ridge", tuneGrid=ridgegrid, trControl=ctr1, preProcess=c("center", "scale"))
ridgeregfit
enetmodel=enet(x=as.matrix(solTrainXtrans), y=solTrainY, lambda = 0.01, normalize = TRUE)
#normalize is for centering and scaling predictors prior to modeling
#lambda controls the ridge-regression penalty and, setting this value to 0, fits the lasso model
enetpred=predict(enetmodel, newx = as.matrix(solTestXtrans), s=0.1, mode="fraction", type="fit")
names(enetpred)
head(enetpred$fit)
enetcoef=predict(enetmodel, newx = as.matrix(solTestXtrans), s=0.1, mode="fraction", type="coefficients")
tail(enetcoef$coefficients)
#other packages: biglars (for large data sets), FLLat (for the fused lasso), grplasso (the group lasso), penalized, relaxo (the relaxed lasso)...
enetgrid=expand.grid(.lambda=c(0, 0.01, 0.1), .fraction=seq(0.05, 1, length=20))
set.seed(100)
enetTune=train(solTrainXtrans, solTrainY, method = "enet", tuneGrid = enetgrid, trControl = ctr1, preProcess = c("center", "scale"))
enetTune
plot(enetTune)
Tomorrow, I will do exercises of Chapter 6.
No comments:
Post a Comment