Commit 6cf83b3a authored by Lovepreet Kapila's avatar Lovepreet Kapila
Browse files

Upload New File

parent 2eac2246
Loading
Loading
Loading
Loading
+99 −0
Original line number Diff line number Diff line
#*******************************************************************************
#* Loop to collect more game reviews
#*
#*******************************************************************************
#*

# clear workspace
rm(list = ls())

# load necessary packages
library(httr)
library(jsonlite)
library(lubridate)

# gameid of Dota 2 - you can change it!
gameid = 570


# Number of requests
n = 100

# create an empty data frame
game.rev <- data.frame() 


# establish the base path URL
rev.url1 = "https://store.steampowered.com/appreviews/"

# establish the URL extension
rev.url2 = "?json=1&filter=recent&num_per_page=100&cursor="


for (i in 1:n) {
  
  # create a conditional statement to assign initial query cursor to cursor=* only for i == 1
  if(i == 1){cursor = "*"} 
  
  # run the API query for batch i.
  rev.request <- GET(url = paste0(rev.url1, gameid, rev.url2, cursor))
  
  # convert query from Unicode to a string, save as tibble.
  reviews.raw <- content(rev.request, as = "text", encoding = "UTF-8")
  reviews.list <- fromJSON(reviews.raw)
  reviews <- reviews.list$reviews
  
  # unnest data frame
  reviews <- do.call("data.frame", reviews)
  
  # # Having issues with 100 reviews per page. Running out of pages and function is aborting.
  # # Must design if/else statement to check if 'recommendationid' exists. If TRUE, continue with loop,
  # # If false, then break.
  if (('recommendationid' %in% names(reviews)) == TRUE) {
  
  reviews$recommendationid <- as.character(reviews$recommendationid)
  reviews$weighted_vote_score     <- as.numeric(reviews$weighted_vote_score)
  
  # convert Unix timestamp to date.time format.
  reviews$timestamp_created  <- as_datetime(reviews$timestamp_created)
  reviews$timestamp_updated  <- as_datetime(reviews$timestamp_updated)
  reviews$author.last_played <- as_datetime(reviews$author.last_played)
  
  
  # to request the next n reviews, need to extract a custom cursor that is provided within the previous request
  cursor <- fromJSON(content(rev.request, as = "text", encoding = "UTF-8"))$cursor
  
  
  # some cursors return characters that are not URL encoded. 
  # Must replace problem pagination "+" with correct character "%2B.
  cursor <- gsub('\\+','%2B', cursor)
  
  # check if the number of columns are the same
  if(i > 1 & ncol(game.rev) != ncol(reviews)){vars <- intersect(names(reviews),names(game.rev))
  game.rev <- game.rev[,names(game.rev)%in% vars]
  reviews <-reviews[,names(reviews)%in% vars]
  }
  
  game.rev <- rbind(game.rev, reviews)
  
  # if recommendationid does not exist in the data frame, then break.
  }else {



    print(paste0("No further recent reviews are available for App ID #",
                 as.character(gameid), ". Query ", as.character(i), " of ",
                 as.character(n), " aborted. No further queries possible."))

    break}
  
  # Printing status code.
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  
}