Loading 02/R/22.11.25_Collect_Steam_Data_Sources.R 0 → 100644 +114 −0 Original line number Diff line number Diff line # clear workspace rm(list = ls()) #TASK 2 # load necessary packages library(httr) library(jsonlite) library(lubridate) #Step 1 #get 100 games data- GET() send a request to the API endpoint- you get raw data in JSON format top100.request <- GET(url = "https://steamspy.com/api.php?request=top100forever") #Step 2 #we need to read the response as a string. content(..,as="text") extract the body of response as readable text. UTF-8 ensures special characters are handled corrrectly. top100.raw <- content(top100.request, as = "text", encoding = "UTF-8") #Step 3 #Transform JSON into an R object. top100.list <- fromJSON(top100.raw) #Step 4: Convert list into a data frame # Can't make data frame directly as list is nested. Therefore, use lapply() that convert each game entry to small data frame. top100.list1 <- lapply(top100.list, as.data.frame) #do.call("rbind",..)- stacks them row by row into one big data frame top100 <- do.call("rbind", top100.list1) #Step 5 Inspect the data head(top100) #First 6 rows summary (top100) #Step 6 Transform price-related variables #Prices are in cents and stored as text. We need to convert them to numeric and scale to dollars. top100$price <- as.numeric(top100$price)*0.01 #multiply 0.01 changes cents to dollars top100$initialprice <- as.numeric(top100$initialprice)*0.01 #as.numerics converts text to numbers top100$discount <- as.numeric(top100$discount) #Step 7 gameid <- top100$appid[1] game_name <- top100$name[1] #TASK 3 #API endpoint for game details details.url <- "http://store.steampowered.com/api/appdetails/?" #API expects the game ID in a query list query.list <- list(appids=gameid) #gameid is the variable and wrapping it in list() make it compatible with Get(). #Perform GET request- fetch raw details of the game and response is again in binary. details.request <-GET(url=details.url, query=query.list) #We need response in string, therefore converting the raw response into readable text. details.raw <- content(details.request, as="text", encoding="UTF-8") #we transform JSON to R list details.list <- fromJSON(details.raw) #The useful information is indie the $data element. details <- details.list[[1]]$data #details.list[[1]]- select first element of the list. $data- extracts the structured information about the game. details$release_date$date details$genres details$required_age details$short_description #TASK 4 (Collect user reviews) #STEP 1 rev.url1 <- "https://store.steampowered.com/appreviews/" rev.url2 <- "?json=1&filter=recent&num_per_page=100&cursor=" cursor <- "*" #rev.url1- base url for reviews #rev.url2- specifies JSON output, filter for recent reviews, limit to 100 reviews, and cursor for pagination. #cursor- means start at the beginning. #Step 2 (Perform GET request) rev.request <- GET(url =paste0(rev.url1, gameid, rev.url2, cursor)) #paste0()- concatenates strings without spaces i.e. buils full URL #Step 3 (Convert binary content to text) rev.raw <- content(rev.request, as="text", encoding="UTF-8") #Step 4 (JSON to R list) rev.list <- fromJSON(rev.raw) #Step 5 (Extract reviews into a data frame) reviews <- rev.list$reviews #Step 6 (Inspect the data) names(reviews) #Column names summary(reviews) #summary statistics- author data is nested #Step 7 (Unnest the author data) reviews <- do.call("data.frame", reviews) #expand nested structure into proper columns names(reviews) #Step 8 (Fix data types) # Convert UNIX timestamp to readable date-time reviews$timestamp_created <- as_datetime(reviews$timestamp_created) reviews$timestamp_created #Ensure votes and playtime are numeric reviews$votes_up <- as.numeric(reviews$votes_up) reviews$votes_up reviews$author.num_games_owned <- as.numeric(reviews$author.num_games_owned) reviews$author.num_games_owned reviews reviews <- unique (reviews) reviews No newline at end of file Loading
02/R/22.11.25_Collect_Steam_Data_Sources.R 0 → 100644 +114 −0 Original line number Diff line number Diff line # clear workspace rm(list = ls()) #TASK 2 # load necessary packages library(httr) library(jsonlite) library(lubridate) #Step 1 #get 100 games data- GET() send a request to the API endpoint- you get raw data in JSON format top100.request <- GET(url = "https://steamspy.com/api.php?request=top100forever") #Step 2 #we need to read the response as a string. content(..,as="text") extract the body of response as readable text. UTF-8 ensures special characters are handled corrrectly. top100.raw <- content(top100.request, as = "text", encoding = "UTF-8") #Step 3 #Transform JSON into an R object. top100.list <- fromJSON(top100.raw) #Step 4: Convert list into a data frame # Can't make data frame directly as list is nested. Therefore, use lapply() that convert each game entry to small data frame. top100.list1 <- lapply(top100.list, as.data.frame) #do.call("rbind",..)- stacks them row by row into one big data frame top100 <- do.call("rbind", top100.list1) #Step 5 Inspect the data head(top100) #First 6 rows summary (top100) #Step 6 Transform price-related variables #Prices are in cents and stored as text. We need to convert them to numeric and scale to dollars. top100$price <- as.numeric(top100$price)*0.01 #multiply 0.01 changes cents to dollars top100$initialprice <- as.numeric(top100$initialprice)*0.01 #as.numerics converts text to numbers top100$discount <- as.numeric(top100$discount) #Step 7 gameid <- top100$appid[1] game_name <- top100$name[1] #TASK 3 #API endpoint for game details details.url <- "http://store.steampowered.com/api/appdetails/?" #API expects the game ID in a query list query.list <- list(appids=gameid) #gameid is the variable and wrapping it in list() make it compatible with Get(). #Perform GET request- fetch raw details of the game and response is again in binary. details.request <-GET(url=details.url, query=query.list) #We need response in string, therefore converting the raw response into readable text. details.raw <- content(details.request, as="text", encoding="UTF-8") #we transform JSON to R list details.list <- fromJSON(details.raw) #The useful information is indie the $data element. details <- details.list[[1]]$data #details.list[[1]]- select first element of the list. $data- extracts the structured information about the game. details$release_date$date details$genres details$required_age details$short_description #TASK 4 (Collect user reviews) #STEP 1 rev.url1 <- "https://store.steampowered.com/appreviews/" rev.url2 <- "?json=1&filter=recent&num_per_page=100&cursor=" cursor <- "*" #rev.url1- base url for reviews #rev.url2- specifies JSON output, filter for recent reviews, limit to 100 reviews, and cursor for pagination. #cursor- means start at the beginning. #Step 2 (Perform GET request) rev.request <- GET(url =paste0(rev.url1, gameid, rev.url2, cursor)) #paste0()- concatenates strings without spaces i.e. buils full URL #Step 3 (Convert binary content to text) rev.raw <- content(rev.request, as="text", encoding="UTF-8") #Step 4 (JSON to R list) rev.list <- fromJSON(rev.raw) #Step 5 (Extract reviews into a data frame) reviews <- rev.list$reviews #Step 6 (Inspect the data) names(reviews) #Column names summary(reviews) #summary statistics- author data is nested #Step 7 (Unnest the author data) reviews <- do.call("data.frame", reviews) #expand nested structure into proper columns names(reviews) #Step 8 (Fix data types) # Convert UNIX timestamp to readable date-time reviews$timestamp_created <- as_datetime(reviews$timestamp_created) reviews$timestamp_created #Ensure votes and playtime are numeric reviews$votes_up <- as.numeric(reviews$votes_up) reviews$votes_up reviews$author.num_games_owned <- as.numeric(reviews$author.num_games_owned) reviews$author.num_games_owned reviews reviews <- unique (reviews) reviews No newline at end of file