set.seed(345678) n <- 50 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) u <- rnorm(n,0,sqrt(2)) d <- 30 z <- matrix(runif(n*d),n) y <- 1+0.5*x1+2*x2-0.2*x3+u app <- data.frame(y=y,x1=x1,x2=x2,x3=x3,z=z) names(app)[-(1:4)] <- paste("z",1:30,sep="") xapp.scale <- scale(app[,-1]) app.scale <- data.frame(y=y,xapp.scale) ntest <- 1000 x1test <- rnorm(ntest) x2test <- rnorm(ntest) x3test <- rnorm(ntest) utest <- rnorm(ntest,0,sqrt(2)) ztest <- matrix(runif(ntest*d),ntest) test <- data.frame(x1=x1test,x2=x2test,x3=x3test,z=ztest) names(test)[-(1:3)] <- paste("z",1:30,sep="") ytest <- 1+0.5*x1test+2*x2test-0.2*x3test+utest test.scale <- data.frame(scale(test,center=attr(xapp.scale,"scaled:center"),scale=attr(xapp.scale,"scaled:scale"))) model1 <- lm(y~.,data=app) yhat <- predict(model1,test) sqrt(mean((yhat-ytest)^2)) model1.scale <- lm(y~.,data=app.scale) yhat <- predict(model1.scale,test.scale) sqrt(mean((yhat-ytest)^2)) model2.scale <- lm(y~x1+x2+x3,data=app.scale) yhat <- predict(model2.scale,test.scale) sqrt(mean((yhat-ytest)^2)) yoracle <- 1+0.5*test$x1+2*test$x2-0.2*test$x3 sqrt(mean((yoracle-ytest)^2)) library(glmnet) library(plotmo) model.lasso <- glmnet(xapp.scale,y,family="gaussian",nlambda=50,alpha=1) model.lasso plot_glmnet(model.lasso,label=5) yhat <- predict(model.lasso,as.matrix(test.scale)) dim(yhat) library(caret) model.lasso.tune <- train(xapp.scale,y,method="glmnet",metric="RMSE", trControl=trainControl(method="repeatedcv", number=5,repeats=100), tuneGrid=data.frame(alpha=1,lambda=model.lasso$lambda)) lambda_opt_lasso <- as.numeric(model.lasso.tune$bestTune[2]) model.lasso.tune$results[model.lasso.tune$results[,2]==lambda_opt_lasso,] glmnet(x,y,family="gaussian",lambda=lambda_opt_lasso,alpha=1)$beta plot_glmnet(model.lasso,label=5,s=lambda_opt_lasso) yhat <- predict(model.lasso.tune,as.matrix(test.scale)) length(yhat) sqrt(mean((yhat-ytest)^2)) model.ridge <- glmnet(xapp.scale,y,family="gaussian",nlambda=50,alpha=0) model.ridge plot_glmnet(model.ridge,label=5) model.ridge.tune <- train(xapp.scale,y,method="glmnet",metric="RMSE", trControl=trainControl(method="repeatedcv", number=5,repeats=100), tuneGrid=data.frame(alpha=0,lambda=model.ridge$lambda)) model.ridge.tune$results[model.ridge.tune$results[,2]==as.numeric(model.ridge.tune$bestTune[2]),] lambda_opt_ridge <- as.numeric(model.ridge.tune$bestTune[2]) model.ridge.tune$results[model.ridge.tune$results[,2]==lambda_opt_ridge,] glmnet(x,y,family="gaussian",lambda=lambda_opt_ridge,alpha=0)$beta plot_glmnet(model.ridge,label=5,s=lambda_opt_ridge) yhat <- predict(model.ridge.tune,test.scale) length(yhat) sqrt(mean((yhat-ytest)^2))