Started task 2 (0df7a1c0) · Commits · Alexander Unger / AIDAHO_IDS_THAS

.gitignore

0 → 100644

+4 −0

Original line number	Diff line number	Diff line
		.Rproj.user
		.Rhistory
		.RData
		.Ruserdata

01_data/raw/2024-06-01_jobs_daten_full.csv

0 → 100644

+5605 −0

File added.

Preview size limit exceeded, changes collapsed.

02_code/R/api/2c.R

0 → 100644

+69 −0

Original line number	Diff line number	Diff line
		#function 2c

		rm(list=ls())
		#works but I have to run the unnest dataframes functions before
		#2c
		download_all_jobs <- function(searchterm, location, radius) {
		library(httr)
		library(dplyr)
		library(jsonlite)

		# Authentication setup (assuming these are constants for all requests)
		headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
		auth_data <- list(
		client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
		client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
		grant_type = 'client_credentials'
		)
		# Fetch access token
		res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
		add_headers(.headers = headers),
		body = auth_data,
		encode = 'form')
		token <- content(res)$access_token

		# API Request setup
		base_url <- "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
		params <- list(
		"was" = searchterm,
		"wo" = location,
		size = 200,
		"umkreis" = radius
		)

		# Initial API Call to get total number of results
		initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
		total_offers <- initial_data$maxErgebnisse
		total_pages <- ceiling(total_offers / 200)


		# Fetch all pages
		all_pages_data <- list()

		# Loop through each page and fetch data
		for (page in 1:total_pages) {
		params$page <- page
		response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		page_data <- jsonlite::fromJSON(rawToChar(response$content))
		all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
		}

		# Combine all pages into one data frame
		job_postings <- bind_rows(all_pages_data)

		unnest_dataframes <- function(x) {
		y <- do.call(data.frame, x)
		if ("data.frame" %in% sapply(y, class)) {
		return(unnest_dataframes(y)) # Recursively call unnest_dataframes if any columns are data frames
		}
		return(y) # Return the flattened data frame
		}

		# Unnest the data frame if needed
		job_postings_unnested <- unnest_dataframes(job_postings)

		return(job_postings_unnested)
		}

		result_df <- download_all_jobs("Daten", "Stuttgart", 100)

02_code/R/api/2d.R

0 → 100644

+53 −0

Original line number	Diff line number	Diff line
		#try to create 2d function


		source("2c.R")
		#2d
		#Attach the function
		save_jobs_to_csv <- function(searchterm, location, radius) {
		# Call the download_all_jobs function to fetch data
		jobs <- download_all_jobs(searchterm, location, radius)

		# Generate filename using the current date
		today <- Sys.Date()
		filename <- paste0(today, "_jobs_daten_full.csv")

		# Save the data frame to a CSV file
		write.csv(jobs, filename, row.names = FALSE)

		return(paste("File saved as:", filename))
		}

		# Example function call
		save_jobs_to_csv("Daten", "Stuttgart", 100)




		#this will be the function with the right working directory

		#this is the correct function that saves the data where it has to be saved
		source("2c.R")

		save_jobs_to_csv <- function(searchterm, location, radius) {
		# Call the download_all_jobs function to fetch data
		jobs <- download_all_jobs(searchterm, location, radius)

		# Generate filename using the current date and specify the directory
		today <- Sys.Date()
		directory <- "/Users/alexanderunger/Desktop/AIDAHO_IDS_THAS/01_data/raw"
		filename <- paste0(directory, "/", today, "_jobs_daten_full.csv") # Adjust path as necessary

		# Save the data frame to a CSV file in the specified directory
		write.csv(jobs, filename, row.names = FALSE)

		return(paste("File saved as:", filename))
		}


		save_jobs_to_csv("Daten", "Stuttgart", 100)

		getwd()

02_code/R/api/all.R

0 → 100644

+173 −0

Original line number	Diff line number	Diff line
		library(httr)
		library(dplyr)
		library(jsonlite)
		##############
		# Task 4 APIS




		rm(list=ls())

		#headers are set for the API request
		headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
		auth_data <- list(
		client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
		client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
		grant_type = 'client_credentials'
		)

		# Post request to fetch access token
		res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
		add_headers(.headers = headers),
		body = auth_data,
		encode = 'form')
		token <- content(res)$access_token


		#not sure whether daten or data
		base_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
		params <- list(
		"was" = "Daten",
		"wo" = "Stuttgart",
		size = 200,
		"umkreis" = "100"
		)


		initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
		total_offers <- initial_data$maxErgebnisse
		total_pages <- ceiling(total_offers / 200)


		# List to hold each page's data frame
		all_pages_data <- list()

		# Loop through each page and fetch data
		for (page in 1:total_pages) {
		params$page <- page
		response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		page_data <- jsonlite::fromJSON(rawToChar(response$content))
		all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
		}


		# Combine all pages into one data frame
		job_postings <- bind_rows(all_pages_data)

		# Show structure of the combined data frame
		str(job_postings)





		#testing out stuff





		#works but I have to run the unnest dataframes functions before
		#2c
		download_all_jobs <- function(searchterm, location, radius) {
		library(httr)
		library(dplyr)
		library(jsonlite)

		# Authentication setup (assuming these are constants for all requests)
		headers <- c("Content-Type" = 'application/x-www-form-urlencoded')
		auth_data <- list(
		client_id = 'c003a37f-024f-462a-b36d-b001be4cd24a',
		client_secret = '32a39620-32b3-4307-9aa1-511e3d7f48a8',
		grant_type = 'client_credentials'
		)
		# Fetch access token
		res <- POST(url = 'https://rest.arbeitsagentur.de/oauth/gettoken_cc',
		add_headers(.headers = headers),
		body = auth_data,
		encode = 'form')
		token <- content(res)$access_token

		# API Request setup
		base_url <- "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs"
		params <- list(
		"was" = searchterm,
		"wo" = location,
		size = 200,
		"umkreis" = radius
		)

		# Initial API Call to get total number of results
		initial_req <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		initial_data <- jsonlite::fromJSON(rawToChar(initial_req$content))
		total_offers <- initial_data$maxErgebnisse
		total_pages <- ceiling(total_offers / 200)


		# Fetch all pages
		all_pages_data <- list()

		# Loop through each page and fetch data
		for (page in 1:total_pages) {
		params$page <- page
		response <- GET(url = base_url, add_headers(OAuthAccessToken = token, Accept = "application/json"), query = params)
		page_data <- jsonlite::fromJSON(rawToChar(response$content))
		all_pages_data[[page]] <- as.data.frame(page_data$stellenangebote)
		}

		# Combine all pages into one data frame
		job_postings <- bind_rows(all_pages_data)

		# Unnest the data frame if needed
		job_postings_unnested <- unnest_dataframes(job_postings)

		return(job_postings_unnested)
		}

		# Example function call
		result_df <- download_all_jobs("Daten", "Stuttgart", 100)


		source("2c.R")
		#2d
		#Attach the function
		save_jobs_to_csv <- function(searchterm, location, radius) {
		# Call the download_all_jobs function to fetch data
		jobs <- download_all_jobs(searchterm, location, radius)

		# Generate filename using the current date
		today <- Sys.Date()
		filename <- paste0(today, "_jobs_daten_full.csv")

		# Save the data frame to a CSV file
		write.csv(jobs, filename, row.names = FALSE)

		return(paste("File saved as:", filename))
		}

		# Example function call
		save_jobs_to_csv("Daten", "Stuttgart", 100)





		save_jobs_to_csv <- function(searchterm, location, radius) {
		# Call the download_all_jobs function to fetch data
		jobs <- download_all_jobs(searchterm, location, radius)

		# Generate filename using the current date and specify the directory
		today <- Sys.Date()
		filename <- paste0("raw/", today, " jobs daten full.csv") # Adjust path as necessary

		# Save the data frame to a CSV file in the specified directory
		write.csv(jobs, filename, row.names = FALSE)

		return(paste("File saved as:", filename))
		}