Loading .gitignore +2 −1 Original line number Diff line number Diff line 01_data_raw/ amex_outcomes.rds amex_outcomes.csv .Rproj.user amex_submission_naive_forecasters.csv 0 → 100644 +10001 −0 File added.Preview size limit exceeded, changes collapsed. Show changes data_prep.R +5 −0 Original line number Diff line number Diff line Loading @@ -50,3 +50,8 @@ submission <- validation %>% select(ID) %>% mutate(PD = NA) saveRDS(submission,"amex_submission.rds") write.csv(submission, "amex_submission.csv") # correct classes outcomes <- datraw_small[20001:30000,] %>% select(ID, target) outcomes saveRDS(outcomes,"amex_outcomes.rds") write.csv(outcomes, "amex_outcomes.csv") naive_example.R +46 −19 Original line number Diff line number Diff line rm(list=ls()) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) library(tidyverse) # Local path to save the file url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads" train <- read.csv(url_validation) library(httr) response <- GET(url_submission) response$url readRDS(rawConnection(content(response, "raw", encoding = "ISO-8859-1"))) readRDS(rawConnection(content(response, "raw"))) readRDS(rawConnection(content(submission, "raw"))) # Local path to save the file destination <- "amex_submission_dl.rds" # train url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads" train <- read.csv(url_train)%>% tibble() %>% select(-X) train url_validation <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_validation.csv?ref_type=heads" validation <- read.csv(url_validation) %>% tibble() %>% select(-X) validation url_submission <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_submission.csv?ref_type=heads" submission <- read.csv(url_submission) %>% tibble() %>% select(-X) submission # fit a naive lm catvars <- c('B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_68') lmod <- lm(target ~ ., data = train %>% select(target, catvars)) lmod # probability of default in validation set pd <- validation %>% mutate( #probability of default pd = predict(lmod, newdata = validation) ) %>% select(ID,pd) final <- left_join( submission, pd ) %>% mutate(PD=pd) %>% select(ID,PD) write.csv(final, "amex_submission_naive_forecasters.csv") outcomes <- readRDS("amex_outcomes.rds") %>% left_join( .,final ) library(pROC) roc_curve <- roc(as.numeric(outcomes$target), outcomes$PD) roc_curve plot(roc_curve, main="ROC Curve", col="#1c61b6") # Download the file download.file(url, destination, mode = "wb") No newline at end of file Loading
.gitignore +2 −1 Original line number Diff line number Diff line 01_data_raw/ amex_outcomes.rds amex_outcomes.csv .Rproj.user
amex_submission_naive_forecasters.csv 0 → 100644 +10001 −0 File added.Preview size limit exceeded, changes collapsed. Show changes
data_prep.R +5 −0 Original line number Diff line number Diff line Loading @@ -50,3 +50,8 @@ submission <- validation %>% select(ID) %>% mutate(PD = NA) saveRDS(submission,"amex_submission.rds") write.csv(submission, "amex_submission.csv") # correct classes outcomes <- datraw_small[20001:30000,] %>% select(ID, target) outcomes saveRDS(outcomes,"amex_outcomes.rds") write.csv(outcomes, "amex_outcomes.csv")
naive_example.R +46 −19 Original line number Diff line number Diff line rm(list=ls()) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) library(tidyverse) # Local path to save the file url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads" train <- read.csv(url_validation) library(httr) response <- GET(url_submission) response$url readRDS(rawConnection(content(response, "raw", encoding = "ISO-8859-1"))) readRDS(rawConnection(content(response, "raw"))) readRDS(rawConnection(content(submission, "raw"))) # Local path to save the file destination <- "amex_submission_dl.rds" # train url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads" train <- read.csv(url_train)%>% tibble() %>% select(-X) train url_validation <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_validation.csv?ref_type=heads" validation <- read.csv(url_validation) %>% tibble() %>% select(-X) validation url_submission <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_submission.csv?ref_type=heads" submission <- read.csv(url_submission) %>% tibble() %>% select(-X) submission # fit a naive lm catvars <- c('B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_68') lmod <- lm(target ~ ., data = train %>% select(target, catvars)) lmod # probability of default in validation set pd <- validation %>% mutate( #probability of default pd = predict(lmod, newdata = validation) ) %>% select(ID,pd) final <- left_join( submission, pd ) %>% mutate(PD=pd) %>% select(ID,PD) write.csv(final, "amex_submission_naive_forecasters.csv") outcomes <- readRDS("amex_outcomes.rds") %>% left_join( .,final ) library(pROC) roc_curve <- roc(as.numeric(outcomes$target), outcomes$PD) roc_curve plot(roc_curve, main="ROC Curve", col="#1c61b6") # Download the file download.file(url, destination, mode = "wb") No newline at end of file