Commit 0df7a1c0 authored by alexung3r's avatar alexung3r
Browse files

Started task 2

parent 4ff8b4ee
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+4 −0
Original line number Diff line number Diff line
.Rproj.user
.Rhistory
.RData
.Ruserdata
+5605 −0

File added.

Preview size limit exceeded, changes collapsed.

02_code/R/api/2c.R

0 → 100644
+69 −0
Original line number Diff line number Diff line
#function 2c

rm(list=ls())
#works but I have to run the unnest dataframes functions before
#2c
download_all_jobs <- function(searchterm, location, radius) {
  library(httr)
  library(dplyr)
  library(jsonlite)
  
  # Authentication setup (assuming these are constants for all requests)
  headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
  auth_data <- list(
    client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
    client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
    grant_type = 'client_credentials'
  )
  # Fetch access token
  res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
              add_headers(.headers = headers),
              body = auth_data,
              encode = 'form')
  token <- content(res)$access_token
  
  # API Request setup
  base_url <- "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
  params <- list(
    "was" = searchterm,
    "wo" = location,
    size = 200,
    "umkreis" = radius
  )
  
  # Initial API Call to get total number of results
  initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
  initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
  total_offers <- initial_data$maxErgebnisse
  total_pages <- ceiling(total_offers / 200)
  
  
  # Fetch all pages
  all_pages_data <- list()
  
  # Loop through each page and fetch data
  for (page in 1:total_pages) {
    params$page <- page
    response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
    page_data <- jsonlite::fromJSON(rawToChar(response$content))
    all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
  }
  
  # Combine all pages into one data frame
  job_postings <- bind_rows(all_pages_data)
  
  unnest_dataframes <- function(x) {
    y <- do.call(data.frame, x)
    if ("data.frame" %in% sapply(y, class)) {
      return(unnest_dataframes(y))  # Recursively call unnest_dataframes if any columns are data frames
    }
    return(y)  # Return the flattened data frame
  }
  
  # Unnest the data frame if needed
  job_postings_unnested <- unnest_dataframes(job_postings)
  
  return(job_postings_unnested)
}

result_df <- download_all_jobs("Daten", "Stuttgart", 100)

02_code/R/api/2d.R

0 → 100644
+53 −0
Original line number Diff line number Diff line
#try to create 2d function


source("2c.R")
#2d
#Attach the function
save_jobs_to_csv <- function(searchterm, location, radius) {
  # Call the download_all_jobs function to fetch data
  jobs <- download_all_jobs(searchterm, location, radius)
  
  # Generate filename using the current date
  today <- Sys.Date()
  filename <- paste0(today, "_jobs_daten_full.csv")
  
  # Save the data frame to a CSV file
  write.csv(jobs, filename, row.names = FALSE)
  
  return(paste("File saved as:", filename))
}

# Example function call
save_jobs_to_csv("Daten", "Stuttgart", 100)




#this will be the function with the right working directory

#this is the correct function that saves the data where it has to be saved 
source("2c.R")

save_jobs_to_csv <- function(searchterm, location, radius) {
  # Call the download_all_jobs function to fetch data
  jobs <- download_all_jobs(searchterm, location, radius)
  
  # Generate filename using the current date and specify the directory
  today <- Sys.Date()
  directory <- "/Users/alexanderunger/Desktop/AIDAHO_IDS_THAS/01_data/raw"
  filename <- paste0(directory, "/", today, "_jobs_daten_full.csv")  # Adjust path as necessary
  
  # Save the data frame to a CSV file in the specified directory
  write.csv(jobs, filename, row.names = FALSE)
  
  return(paste("File saved as:", filename))
}


save_jobs_to_csv("Daten", "Stuttgart", 100)

getwd()


02_code/R/api/all.R

0 → 100644
+173 −0
Original line number Diff line number Diff line
library(httr)
library(dplyr)
library(jsonlite)
##############
# Task 4 APIS




rm(list=ls())

#headers are set for the API request
headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
auth_data <- list(
  client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
  client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
  grant_type = 'client_credentials'
)

# Post request to fetch access token
res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
            add_headers(.headers = headers),
            body = auth_data,
            encode = 'form')
token <- content(res)$access_token


#not sure whether daten or data
base_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
params <- list(
  "was" = "Daten",
  "wo" = "Stuttgart",
  size = 200,
  "umkreis" = "100"
)


initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
total_offers <- initial_data$maxErgebnisse
total_pages <- ceiling(total_offers / 200)


# List to hold each page's data frame
all_pages_data <- list()

# Loop through each page and fetch data
for (page in 1:total_pages) {
  params$page <- page
  response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
  page_data <- jsonlite::fromJSON(rawToChar(response$content))
  all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
}


# Combine all pages into one data frame
job_postings <- bind_rows(all_pages_data)

# Show structure of the combined data frame
str(job_postings)





#testing out stuff





#works but I have to run the unnest dataframes functions before
#2c
download_all_jobs <- function(searchterm, location, radius) {
  library(httr)
  library(dplyr)
  library(jsonlite)
  
  # Authentication setup (assuming these are constants for all requests)
  headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
  auth_data <- list(
    client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
    client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
    grant_type = 'client_credentials'
  )
  # Fetch access token
  res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
              add_headers(.headers = headers),
              body = auth_data,
              encode = 'form')
  token <- content(res)$access_token
  
  # API Request setup
  base_url <- "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
  params <- list(
    "was" = searchterm,
    "wo" = location,
    size = 200,
    "umkreis" = radius
  )
  
  # Initial API Call to get total number of results
  initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
  initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
  total_offers <- initial_data$maxErgebnisse
  total_pages <- ceiling(total_offers / 200)
  
  
  # Fetch all pages
  all_pages_data <- list()
  
  # Loop through each page and fetch data
  for (page in 1:total_pages) {
    params$page <- page
    response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
    page_data <- jsonlite::fromJSON(rawToChar(response$content))
    all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
  }
  
  # Combine all pages into one data frame
  job_postings <- bind_rows(all_pages_data)
  
  # Unnest the data frame if needed
  job_postings_unnested <- unnest_dataframes(job_postings)
  
  return(job_postings_unnested)
}

# Example function call
result_df <- download_all_jobs("Daten", "Stuttgart", 100)


source("2c.R")
#2d
#Attach the function
save_jobs_to_csv <- function(searchterm, location, radius) {
  # Call the download_all_jobs function to fetch data
  jobs <- download_all_jobs(searchterm, location, radius)
  
  # Generate filename using the current date
  today <- Sys.Date()
  filename <- paste0(today, "_jobs_daten_full.csv")
  
  # Save the data frame to a CSV file
  write.csv(jobs, filename, row.names = FALSE)
  
  return(paste("File saved as:", filename))
}

# Example function call
save_jobs_to_csv("Daten", "Stuttgart", 100)





save_jobs_to_csv <- function(searchterm, location, radius) {
  # Call the download_all_jobs function to fetch data
  jobs <- download_all_jobs(searchterm, location, radius)
  
  # Generate filename using the current date and specify the directory
  today <- Sys.Date()
  filename <- paste0("raw/", today, " jobs daten full.csv")  # Adjust path as necessary
  
  # Save the data frame to a CSV file in the specified directory
  write.csv(jobs, filename, row.names = FALSE)
  
  return(paste("File saved as:", filename))
}



Loading