Commit d028ce4e authored by Lovepreet Kapila's avatar Lovepreet Kapila
Browse files

Upload New File

parent 6cf83b3a
Loading
Loading
Loading
Loading
+114 −0
Original line number Diff line number Diff line
# clear workspace
rm(list = ls())

#TASK 2
# load necessary packages
library(httr)
library(jsonlite)
library(lubridate)

#Step 1
#get 100 games data- GET() send a request to the API endpoint- you get raw data in JSON format
top100.request <- GET(url = "https://steamspy.com/api.php?request=top100forever")

#Step 2
#we need to read the response as a string. content(..,as="text") extract the body of response as readable text. UTF-8 ensures special characters are handled corrrectly.
top100.raw <- content(top100.request, as = "text", encoding = "UTF-8")

#Step 3
#Transform JSON into an R object.
top100.list <- fromJSON(top100.raw)

#Step 4: Convert list into a data frame
# Can't make data frame directly as list is nested. Therefore, use lapply() that convert each game entry to small data frame.
top100.list1 <- lapply(top100.list, as.data.frame)

#do.call("rbind",..)- stacks them row by row into one big data frame
top100 <- do.call("rbind", top100.list1)

#Step 5 Inspect the data
head(top100) #First 6 rows
summary (top100)

#Step 6 Transform price-related variables
#Prices are in cents and stored as text. We need to convert them to numeric and scale to dollars.
top100$price <- as.numeric(top100$price)*0.01 #multiply 0.01 changes cents to dollars
top100$initialprice <- as.numeric(top100$initialprice)*0.01 #as.numerics converts text to numbers
top100$discount <- as.numeric(top100$discount)

#Step 7
gameid <- top100$appid[1]
game_name <- top100$name[1]

  
#TASK 3
#API endpoint for game details
details.url <- "http://store.steampowered.com/api/appdetails/?"

#API expects the game ID in a query list
query.list <- list(appids=gameid) #gameid is the variable and wrapping it in list() make it compatible with Get().

#Perform GET request- fetch raw details of the game and response is again in binary.
details.request <-GET(url=details.url, query=query.list)

#We need response in string, therefore converting the raw response into readable text.
details.raw <- content(details.request, as="text", encoding="UTF-8")

#we transform JSON to R list
details.list <- fromJSON(details.raw)

#The useful information is indie the $data element.
details <- details.list[[1]]$data #details.list[[1]]- select first element of the list. $data- extracts the structured information about the game.

details$release_date$date
details$genres
details$required_age
details$short_description

#TASK 4 (Collect user reviews)

#STEP 1
rev.url1 <- "https://store.steampowered.com/appreviews/"
rev.url2 <- "?json=1&filter=recent&num_per_page=100&cursor="
cursor <- "*"

#rev.url1- base url for reviews
#rev.url2- specifies JSON output, filter for recent reviews, limit to 100 reviews, and cursor for pagination.
#cursor- means start at the beginning.

#Step 2 (Perform GET request)
rev.request <- GET(url =paste0(rev.url1, gameid, rev.url2, cursor))
#paste0()- concatenates strings without spaces i.e. buils full URL

#Step 3 (Convert binary content to text)
rev.raw <- content(rev.request, as="text", encoding="UTF-8")

#Step 4 (JSON to R list)
rev.list <- fromJSON(rev.raw)

#Step 5 (Extract reviews into a data frame)
reviews <- rev.list$reviews

#Step 6 (Inspect the data)
names(reviews) #Column names
summary(reviews) #summary statistics- author data is nested

#Step 7 (Unnest the author data)
reviews <- do.call("data.frame", reviews) #expand nested structure into proper columns
names(reviews)

#Step 8 (Fix data types)
# Convert UNIX timestamp to readable date-time
reviews$timestamp_created <- as_datetime(reviews$timestamp_created)
reviews$timestamp_created

#Ensure votes and playtime are numeric
reviews$votes_up <- as.numeric(reviews$votes_up)
reviews$votes_up
reviews$author.num_games_owned <- as.numeric(reviews$author.num_games_owned)
reviews$author.num_games_owned

reviews

reviews <- unique (reviews)
reviews
 No newline at end of file