Loading 02/R/231030_SK_Collect_Steam_Reviews__3_.R 0 → 100644 +99 −0 Original line number Diff line number Diff line #******************************************************************************* #* Loop to collect more game reviews #* #******************************************************************************* #* # clear workspace rm(list = ls()) # load necessary packages library(httr) library(jsonlite) library(lubridate) # gameid of Dota 2 - you can change it! gameid = 570 # Number of requests n = 100 # create an empty data frame game.rev <- data.frame() # establish the base path URL rev.url1 = "https://store.steampowered.com/appreviews/" # establish the URL extension rev.url2 = "?json=1&filter=recent&num_per_page=100&cursor=" for (i in 1:n) { # create a conditional statement to assign initial query cursor to cursor=* only for i == 1 if(i == 1){cursor = "*"} # run the API query for batch i. rev.request <- GET(url = paste0(rev.url1, gameid, rev.url2, cursor)) # convert query from Unicode to a string, save as tibble. reviews.raw <- content(rev.request, as = "text", encoding = "UTF-8") reviews.list <- fromJSON(reviews.raw) reviews <- reviews.list$reviews # unnest data frame reviews <- do.call("data.frame", reviews) # # Having issues with 100 reviews per page. Running out of pages and function is aborting. # # Must design if/else statement to check if 'recommendationid' exists. If TRUE, continue with loop, # # If false, then break. if (('recommendationid' %in% names(reviews)) == TRUE) { reviews$recommendationid <- as.character(reviews$recommendationid) reviews$weighted_vote_score <- as.numeric(reviews$weighted_vote_score) # convert Unix timestamp to date.time format. reviews$timestamp_created <- as_datetime(reviews$timestamp_created) reviews$timestamp_updated <- as_datetime(reviews$timestamp_updated) reviews$author.last_played <- as_datetime(reviews$author.last_played) # to request the next n reviews, need to extract a custom cursor that is provided within the previous request cursor <- fromJSON(content(rev.request, as = "text", encoding = "UTF-8"))$cursor # some cursors return characters that are not URL encoded. # Must replace problem pagination "+" with correct character "%2B. cursor <- gsub('\\+','%2B', cursor) # check if the number of columns are the same if(i > 1 & ncol(game.rev) != ncol(reviews)){vars <- intersect(names(reviews),names(game.rev)) game.rev <- game.rev[,names(game.rev)%in% vars] reviews <-reviews[,names(reviews)%in% vars] } game.rev <- rbind(game.rev, reviews) # if recommendationid does not exist in the data frame, then break. }else { print(paste0("No further recent reviews are available for App ID #", as.character(gameid), ". Query ", as.character(i), " of ", as.character(n), " aborted. No further queries possible.")) break} # Printing status code. message(".", appendLF = FALSE) Sys.sleep(time = 0.5) message(".", appendLF = FALSE) Sys.sleep(time = 0.5) message(".", appendLF = FALSE) Sys.sleep(time = 0.5) } Loading
02/R/231030_SK_Collect_Steam_Reviews__3_.R 0 → 100644 +99 −0 Original line number Diff line number Diff line #******************************************************************************* #* Loop to collect more game reviews #* #******************************************************************************* #* # clear workspace rm(list = ls()) # load necessary packages library(httr) library(jsonlite) library(lubridate) # gameid of Dota 2 - you can change it! gameid = 570 # Number of requests n = 100 # create an empty data frame game.rev <- data.frame() # establish the base path URL rev.url1 = "https://store.steampowered.com/appreviews/" # establish the URL extension rev.url2 = "?json=1&filter=recent&num_per_page=100&cursor=" for (i in 1:n) { # create a conditional statement to assign initial query cursor to cursor=* only for i == 1 if(i == 1){cursor = "*"} # run the API query for batch i. rev.request <- GET(url = paste0(rev.url1, gameid, rev.url2, cursor)) # convert query from Unicode to a string, save as tibble. reviews.raw <- content(rev.request, as = "text", encoding = "UTF-8") reviews.list <- fromJSON(reviews.raw) reviews <- reviews.list$reviews # unnest data frame reviews <- do.call("data.frame", reviews) # # Having issues with 100 reviews per page. Running out of pages and function is aborting. # # Must design if/else statement to check if 'recommendationid' exists. If TRUE, continue with loop, # # If false, then break. if (('recommendationid' %in% names(reviews)) == TRUE) { reviews$recommendationid <- as.character(reviews$recommendationid) reviews$weighted_vote_score <- as.numeric(reviews$weighted_vote_score) # convert Unix timestamp to date.time format. reviews$timestamp_created <- as_datetime(reviews$timestamp_created) reviews$timestamp_updated <- as_datetime(reviews$timestamp_updated) reviews$author.last_played <- as_datetime(reviews$author.last_played) # to request the next n reviews, need to extract a custom cursor that is provided within the previous request cursor <- fromJSON(content(rev.request, as = "text", encoding = "UTF-8"))$cursor # some cursors return characters that are not URL encoded. # Must replace problem pagination "+" with correct character "%2B. cursor <- gsub('\\+','%2B', cursor) # check if the number of columns are the same if(i > 1 & ncol(game.rev) != ncol(reviews)){vars <- intersect(names(reviews),names(game.rev)) game.rev <- game.rev[,names(game.rev)%in% vars] reviews <-reviews[,names(reviews)%in% vars] } game.rev <- rbind(game.rev, reviews) # if recommendationid does not exist in the data frame, then break. }else { print(paste0("No further recent reviews are available for App ID #", as.character(gameid), ". Query ", as.character(i), " of ", as.character(n), " aborted. No further queries possible.")) break} # Printing status code. message(".", appendLF = FALSE) Sys.sleep(time = 0.5) message(".", appendLF = FALSE) Sys.sleep(time = 0.5) message(".", appendLF = FALSE) Sys.sleep(time = 0.5) }