[SOLVED] CS # The aim of this session is to compare different models

$25

File Name: CS_#_The_aim_of_this_session_is_to_compare_different_models.zip
File Size: 555.78 KB

5/5 - (1 vote)

# The aim of this session is to compare different models
# and to select the best one able to predict variable y

# we first load the dataset
menti = read.csv(menti_200_full.csv)

# before calibrating the models it is always
# output summary statistics
library(stargazer)
stargazer(menti, type = text)

# you can also output several plots to check
# if there are any outliers or the correlation and
# dependence between variables

# we then need to split the data between train and test
# train data will be used to calibrate the models
# I am using 75% of the samle as train
idx_train = sample(1:400, size = 300)
menti_train = menti[idx_train, ]

# the remaing 25% will be used
# to compare out-of-sample performance
menti_test = menti[-idx_train, ]

# we can start by estimating an ols model
ols_mod1 = lm(y ~ ., data = menti_train)
stargazer(ols_mod1, type = text)

# and we can construct our performance measures
# with the predict function
ols_pred1 = predict(ols_mod1, newdata = menti_test)

# which of these two measures penalizes more large errors
# vs smaller ones
# RMSE
sqrt(mean((ols_pred1 menti_test$y)^2))
# MAE
mean(abs(ols_pred1 menti_test$y))

# how would you construct in R a performance measure
# which uses absolute errors but penalises negative
# errors twice as much as positive ones?

# another useful model is the Elastic-Net
# from package glmnet
library(glmnetUtils)

# E-Net has two params: alpha and lambda
# alpha is set by the user and it is between 0 and 1
# alpha = 1 (default) gives the LASSO
# alpha = 0 gives the RIDGE
# any value in between is for a combination of the two

# which model are we estimating here?
enet_mod2 = glmnet(y ~ ., data = menti_train)

# stargazer does not support glmnet, yet
stargazer(enet_mod2, type = text)

# we can still use predict to compute out-of-sample performance
enet_pred2 = predict(enet_mod2, newdata = menti_test)

# which model between ols and E-Net performs better

# RMSE
sqrt(mean((enet_pred2 menti_test$y)^2))
# MAE
mean(abs(enet_pred2 menti_test$y))

# you can use function cv.glmnet to optimize lambda by
# cross validation but you still need to provide alpha
cv_enet_mod1 = cv.glmnet(x = as.matrix(menti_train[,-1]),
y = as.matrix(menti_train[,1]),
alpha = 0.5)

# therefore you need to use a loop by different alphas
# to pick the best one

# RMSE
sqrt(mean((cv_enet_mod1 menti_test$y)^2))
# MAE
mean(abs(cv_enet_mod1 menti_test$y))

# cva.glmnet will apply cv.glmnet by different alphas
# so that you can do the comparison after

cva_enet_mod1 = cva.glmnet(x = as.matrix(menti_train[,-1]),
y = as.matrix(menti_train[,1]))

for(alpha in cva_enet_mod1$alpha) {
cva_enet_pred1 = predict(cva_enet_mod1,
newx = as.matrix(menti_test[,-1]),
alpha = alpha)
cat(alpha, sqrt(mean((cva_enet_pred1 menti_test$y)^2)),
)
}

Reviews

There are no reviews yet.

Only logged in customers who have purchased this product may leave a review.

Shopping Cart
[SOLVED] CS # The aim of this session is to compare different models
$25