Commit ef7ee0b6 authored by Marius Puke's avatar Marius Puke
Browse files

add naive example

parent 7daf25c1
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
01_data_raw/

amex_outcomes.rds
amex_outcomes.csv
.Rproj.user
+10001 −0

File added.

Preview size limit exceeded, changes collapsed.

+5 −0
Original line number Diff line number Diff line
@@ -50,3 +50,8 @@ submission <- validation %>% select(ID) %>% mutate(PD = NA)
saveRDS(submission,"amex_submission.rds")
write.csv(submission, "amex_submission.csv")

# correct classes
outcomes <- datraw_small[20001:30000,] %>% select(ID, target)
outcomes
saveRDS(outcomes,"amex_outcomes.rds")
write.csv(outcomes, "amex_outcomes.csv")
+46 −19
Original line number Diff line number Diff line
rm(list=ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
library(tidyverse)
# Local path to save the file
url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads"
train <- read.csv(url_validation)


library(httr)
response <- GET(url_submission)
response$url


readRDS(rawConnection(content(response, "raw", encoding = "ISO-8859-1")))

readRDS(rawConnection(content(response, "raw")))

readRDS(rawConnection(content(submission, "raw")))

# Local path to save the file
destination <- "amex_submission_dl.rds"
# train 
url_train <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_train.csv?ref_type=heads"
train <- read.csv(url_train)%>% tibble() %>% select(-X)
train

url_validation <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_validation.csv?ref_type=heads"
validation <- read.csv(url_validation) %>% tibble() %>% select(-X)
validation

url_submission <- "https://aidaho-edu.uni-hohenheim.de/gitlab/mariuspuke/sl2024_classificationcompetition/-/raw/main/amex_submission.csv?ref_type=heads"
submission <- read.csv(url_submission) %>% tibble() %>% select(-X)
submission


# fit a naive lm 
catvars <- c('B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64',  'D_68')
lmod <- lm(target ~ ., data = train %>% select(target, catvars))
lmod

# probability of default in validation set
pd <- 
validation %>% 
  mutate(
    #probability of default
    pd = predict(lmod, newdata = validation)
  ) %>% 
  select(ID,pd)

final <- 
left_join(
  submission, pd
  ) %>% 
  mutate(PD=pd) %>% 
  select(ID,PD)
write.csv(final, "amex_submission_naive_forecasters.csv")


outcomes <- readRDS("amex_outcomes.rds") %>% 
  left_join(
  .,final
)

library(pROC)
roc_curve <- roc(as.numeric(outcomes$target), outcomes$PD)
roc_curve
plot(roc_curve, main="ROC Curve", col="#1c61b6")
# Download the file
download.file(url, destination, mode = "wb")
 No newline at end of file