Commit 847009aa authored by Johannes Bleher's avatar Johannes Bleher
Browse files

Inclass p3 push to repo

parent 36b8ffde
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -34,3 +34,14 @@ knnFit <- train ( as.factor(indicator ) ~ . ,
                  trControl = ctrl ,
                  preProcess = c("center" ,"scale") ,
                  tuneLength = 10)


# Model evaluation

XX_valid <- subset(validSet,select = -c(file,indicator))

pcs_new <- predict(pcs,newdata = XX_valid)

pred_age <- predict(knnFit,newdata=pcs_new)

prd_outcome_comparison <- cbind(as.numeric(pred_age)-1,validSet$indicator)
+43 −0
Original line number Diff line number Diff line
# Generate the probabilities of default
# Clear workspace and graphs
if(!is.null(dev.list())) dev.off()
rm(list = ls())

library("caret")
library("data.table")

load(file="01_data/ML_data.Rdata")


train_preProcess <- preProcess(trainSet[,-c("indicator","file")],method=c("center","scale"))
valid_preProcess <- preProcess(validSet[,-c("indicator","file")],method=c("center","scale"))


trainSet_stdz <- predict(train_preProcess,trainSet[,-c("file")])
validSet_stdz <- predict(valid_preProcess,validSet[,-c("file")])


ctrl <- trainControl(method="cv",number=10,verbose=TRUE)

tuneGrid <- expand.grid( alpha= 1, lambda = seq(0.1,1,0.1))

lasso_fit <- train(y = as.factor(trainSet_stdz$indicator),
      x = trainSet_stdz[,-c("indicator")],
      method="glmnet",
      metric="Accuracy",
      trControl = ctrl,
      tuneGrid = tuneGrid)


pred <- predict(lasso_fit,validSet_stdz)
# Alternative:
#predictions_lasso <- lasso_fit %>% predict(validSet) 


bestTuneModelCoefs <- as.data.frame.matrix(coef(lasso_fit$finalModel, lasso_fit$bestTune$lambda))

bestTuneModelCoefs$mz_vals <- as.numeric(rownames(bestTuneModelCoefs))

bestTuneModelCoefs[head(order(bestTuneModelCoefs$s1,decreasing=TRUE),10),]