Loading 02_code/R/230628_JB_InclassAS7_p2.R +11 −0 Original line number Diff line number Diff line Loading @@ -34,3 +34,14 @@ knnFit <- train ( as.factor(indicator ) ~ . , trControl = ctrl , preProcess = c("center" ,"scale") , tuneLength = 10) # Model evaluation XX_valid <- subset(validSet,select = -c(file,indicator)) pcs_new <- predict(pcs,newdata = XX_valid) pred_age <- predict(knnFit,newdata=pcs_new) prd_outcome_comparison <- cbind(as.numeric(pred_age)-1,validSet$indicator) 02_code/R/230705_JB_InclassAS7_p3.R 0 → 100644 +43 −0 Original line number Diff line number Diff line # Generate the probabilities of default # Clear workspace and graphs if(!is.null(dev.list())) dev.off() rm(list = ls()) library("caret") library("data.table") load(file="01_data/ML_data.Rdata") train_preProcess <- preProcess(trainSet[,-c("indicator","file")],method=c("center","scale")) valid_preProcess <- preProcess(validSet[,-c("indicator","file")],method=c("center","scale")) trainSet_stdz <- predict(train_preProcess,trainSet[,-c("file")]) validSet_stdz <- predict(valid_preProcess,validSet[,-c("file")]) ctrl <- trainControl(method="cv",number=10,verbose=TRUE) tuneGrid <- expand.grid( alpha= 1, lambda = seq(0.1,1,0.1)) lasso_fit <- train(y = as.factor(trainSet_stdz$indicator), x = trainSet_stdz[,-c("indicator")], method="glmnet", metric="Accuracy", trControl = ctrl, tuneGrid = tuneGrid) pred <- predict(lasso_fit,validSet_stdz) # Alternative: #predictions_lasso <- lasso_fit %>% predict(validSet) bestTuneModelCoefs <- as.data.frame.matrix(coef(lasso_fit$finalModel, lasso_fit$bestTune$lambda)) bestTuneModelCoefs$mz_vals <- as.numeric(rownames(bestTuneModelCoefs)) bestTuneModelCoefs[head(order(bestTuneModelCoefs$s1,decreasing=TRUE),10),] Loading
02_code/R/230628_JB_InclassAS7_p2.R +11 −0 Original line number Diff line number Diff line Loading @@ -34,3 +34,14 @@ knnFit <- train ( as.factor(indicator ) ~ . , trControl = ctrl , preProcess = c("center" ,"scale") , tuneLength = 10) # Model evaluation XX_valid <- subset(validSet,select = -c(file,indicator)) pcs_new <- predict(pcs,newdata = XX_valid) pred_age <- predict(knnFit,newdata=pcs_new) prd_outcome_comparison <- cbind(as.numeric(pred_age)-1,validSet$indicator)
02_code/R/230705_JB_InclassAS7_p3.R 0 → 100644 +43 −0 Original line number Diff line number Diff line # Generate the probabilities of default # Clear workspace and graphs if(!is.null(dev.list())) dev.off() rm(list = ls()) library("caret") library("data.table") load(file="01_data/ML_data.Rdata") train_preProcess <- preProcess(trainSet[,-c("indicator","file")],method=c("center","scale")) valid_preProcess <- preProcess(validSet[,-c("indicator","file")],method=c("center","scale")) trainSet_stdz <- predict(train_preProcess,trainSet[,-c("file")]) validSet_stdz <- predict(valid_preProcess,validSet[,-c("file")]) ctrl <- trainControl(method="cv",number=10,verbose=TRUE) tuneGrid <- expand.grid( alpha= 1, lambda = seq(0.1,1,0.1)) lasso_fit <- train(y = as.factor(trainSet_stdz$indicator), x = trainSet_stdz[,-c("indicator")], method="glmnet", metric="Accuracy", trControl = ctrl, tuneGrid = tuneGrid) pred <- predict(lasso_fit,validSet_stdz) # Alternative: #predictions_lasso <- lasso_fit %>% predict(validSet) bestTuneModelCoefs <- as.data.frame.matrix(coef(lasso_fit$finalModel, lasso_fit$bestTune$lambda)) bestTuneModelCoefs$mz_vals <- as.numeric(rownames(bestTuneModelCoefs)) bestTuneModelCoefs[head(order(bestTuneModelCoefs$s1,decreasing=TRUE),10),]