Commit ed95d015 authored by Augustine Mensah's avatar Augustine Mensah
Browse files

initial changes

parents
Loading
Loading
Loading
Loading

00_docs/Assignment.pdf

0 → 100644
+240 KiB

File added.

No diff preview for this file type.

+191 KiB

File added.

No diff preview for this file type.

+77.8 KiB

File added.

No diff preview for this file type.

+102 −0
Original line number Diff line number Diff line
#*******************************************************************************
#* Loop to collect more game reviews
#*
#*******************************************************************************
#*

# clear workspace
rm(list = ls())

# load necessary packages
library(httr)
library(jsonlite)
library(lubridate)

# gameid of Dota 2 - you can change it!
gameid = 570


# Number of requests
n = 100

# create an empty data frame
game.rev <- data.frame() 


# establish the base path URL
rev.url1 = "https://store.steampowered.com/appreviews/"

# establish the URL extension
rev.url2 = "?json=1&filter=recent&num_per_page=100&cursor="


for (i in 1:n) {
  
  # create a conditional statement to assign initial query cursor to cursor=* only for i == 1
  if(i == 1){cursor = "*"} 
  
  # run the API query for batch i.
  rev.request <- GET(url = paste0(rev.url1, gameid, rev.url2, cursor))
  
  # convert query from Unicode to a string, save as tibble.
  reviews.raw <- content(rev.request, as = "text", encoding = "UTF-8")
  reviews.list <- fromJSON(reviews.raw)
  reviews <- reviews.list$reviews
  
  # unnest data frame
  reviews <- do.call("data.frame", reviews)
  
  # # Having issues with 100 reviews per page. Running out of pages and function is aborting.
  # # Must design if/else statement to check if 'recommendationid' exists. If TRUE, continue with loop,
  # # If false, then break.
  if (('recommendationid' %in% names(reviews)) == TRUE) {
  
  reviews$recommendationid <- as.character(reviews$recommendationid)
  reviews$weighted_vote_score     <- as.numeric(reviews$weighted_vote_score)
  
  # convert Unix timestamp to date.time format.
  reviews$timestamp_created  <- as_datetime(reviews$timestamp_created)
  reviews$timestamp_updated  <- as_datetime(reviews$timestamp_updated)
  reviews$author.last_played <- as_datetime(reviews$author.last_played)
  
  
  # to request the next n reviews, need to extract a custom cursor that is provided within the previous request
  cursor <- fromJSON(content(rev.request, as = "text", encoding = "UTF-8"))$cursor
  
  
  # some cursors return characters that are not URL encoded. 
  # Must replace problem pagination "+" with correct character "%2B.
  cursor <- gsub('\\+','%2B', cursor)
  
  # check if the number of columns are the same
  if(i > 1 & ncol(game.rev) != ncol(reviews)){vars <- intersect(names(reviews),names(game.rev))
  game.rev <- game.rev[,names(game.rev)%in% vars]
  reviews <-reviews[,names(reviews)%in% vars]
  }
  
  game.rev <- rbind(game.rev, reviews)
  
  # if recommendationid does not exist in the data frame, then break.
  }else {



    print(paste0("No further recent reviews are available for App ID #",
                 as.character(gameid), ". Query ", as.character(i), " of ",
                 as.character(n), " aborted. No further queries possible."))

    break}
  
  # Printing status code.
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  message(".", appendLF = FALSE) 
  Sys.sleep(time = 0.5)
  
} 

game.rev <- game.rev[duplicated(game.rev) == F, ]

save(game.rev , file = "gamerev.RData")
+66 −0
Original line number Diff line number Diff line
sentiment_deepseek <- function(text){
  # Define the URL
  url <- "https://aidaho-edu.uni-hohenheim.de/ollama/queue"
  
  # here is the base prompt
  base_prompt <- 'You are an advanced AI assistant. You was created to perform sentiment analysis on input text.
                I need you to classify each text you receive and provide your analysis using the following JSON schema:
                {
                "sentiment": {
                "type": "number",
                "description": "A floating-point representation of the sentiment of the text, 
                rounded to two decimal places. Scale ranges from -1.0 (negative) to 1.0 (positive), 
                where 0.0 represents neutral sentiment.",
                "required": true
               },
            "emotions": {
            "type": "array",
            "description": "A list of one or multiple emotions of the NRC dictionary 
            that are most relevant to the text. 
            The possible values are: anger, anticipation, disgust, fear, joy, 
            sadness, surprise, trust. Assign only these emotions!",
            "required": true
            }
        }

          Always respond with a valid JSON object adhering to this schema and never add 
          more to the JSON format. 
          Do not include any other text or messages in your response!
          Exclude markdown. Ignore if the sentence is incomplete or
          contains spelling errors.

        INPUT TEXT is'
  # here you have to run 
  prompt <- paste(base_prompt,text)
  
  
  
  # Construct the request body as a named list
  payload <- list(
    model = "deepseek-r1:32b",
    options = list(seed=42),
    endpoint = "/api/generate",
    data = list(
      stream = FALSE,
      prompt = prompt
    )
  )
  
  # Make the POST request
  response <- httr::POST(
    url = url,
    add_headers("Content-Type" = "application/json"),
    body = payload,
    encode = "json"
  )
  
  content <- httr::content(response, as = "text", encoding = "UTF-8")
  parsed  <- jsonlite::fromJSON(content)
  
  json_str <- stringr::str_extract(parsed$response, "\\{(?:[^{}]|\\{[^{}]*\\})*\\}")
  json_str <- gsub("\\\\\"", "\"", json_str)
  json_str <- gsub("\\\\n", " ", json_str)
  
  # Now safely parse it
  return(jsonlite::fromJSON(json_str))
}