initial changes (ed95d015) · Commits · Augustine Mensah / pods_ass_team30

Original line number	Diff line number	Diff line
		#*******************************************************************************
		#* Loop to collect more game reviews
		#*
		#*******************************************************************************
		#*

		# clear workspace
		rm(list = ls())

		# load necessary packages
		library(httr)
		library(jsonlite)
		library(lubridate)

		# gameid of Dota 2 - you can change it!
		gameid = 570


		# Number of requests
		n = 100

		# create an empty data frame
		game.rev <- data.frame()


		# establish the base path URL
		rev.url1 = "https://store.steampowered.com/appreviews/"

		# establish the URL extension
		rev.url2 = "?json=1&filter=recent&num_per_page=100&cursor="


		for (i in 1:n) {

		# create a conditional statement to assign initial query cursor to cursor=* only for i == 1
		if(i == 1){cursor = "*"}

		# run the API query for batch i.
		rev.request <- GET(url = paste0(rev.url1, gameid, rev.url2, cursor))

		# convert query from Unicode to a string, save as tibble.
		reviews.raw <- content(rev.request, as = "text", encoding = "UTF-8")
		reviews.list <- fromJSON(reviews.raw)
		reviews <- reviews.list$reviews

		# unnest data frame
		reviews <- do.call("data.frame", reviews)

		# # Having issues with 100 reviews per page. Running out of pages and function is aborting.
		# # Must design if/else statement to check if 'recommendationid' exists. If TRUE, continue with loop,
		# # If false, then break.
		if (('recommendationid' %in% names(reviews)) == TRUE) {

		reviews$recommendationid <- as.character(reviews$recommendationid)
		reviews$weighted_vote_score <- as.numeric(reviews$weighted_vote_score)

		# convert Unix timestamp to date.time format.
		reviews$timestamp_created <- as_datetime(reviews$timestamp_created)
		reviews$timestamp_updated <- as_datetime(reviews$timestamp_updated)
		reviews$author.last_played <- as_datetime(reviews$author.last_played)


		# to request the next n reviews, need to extract a custom cursor that is provided within the previous request
		cursor <- fromJSON(content(rev.request, as = "text", encoding = "UTF-8"))$cursor


		# some cursors return characters that are not URL encoded.
		# Must replace problem pagination "+" with correct character "%2B.
		cursor <- gsub('\\+','%2B', cursor)

		# check if the number of columns are the same
		if(i > 1 & ncol(game.rev) != ncol(reviews)){vars <- intersect(names(reviews),names(game.rev))
		game.rev <- game.rev[,names(game.rev)%in% vars]
		reviews <-reviews[,names(reviews)%in% vars]
		}

		game.rev <- rbind(game.rev, reviews)

		# if recommendationid does not exist in the data frame, then break.
		}else {



		print(paste0("No further recent reviews are available for App ID #",
		as.character(gameid), ". Query ", as.character(i), " of ",
		as.character(n), " aborted. No further queries possible."))

		break}

		# Printing status code.
		message(".", appendLF = FALSE)
		Sys.sleep(time = 0.5)
		message(".", appendLF = FALSE)
		Sys.sleep(time = 0.5)
		message(".", appendLF = FALSE)
		Sys.sleep(time = 0.5)

		}

		game.rev <- game.rev[duplicated(game.rev) == F, ]

		save(game.rev , file = "gamerev.RData")

02_code/251201 sentiment deepseek.R

0 → 100644

+66 −0

Original line number	Diff line number	Diff line
		sentiment_deepseek <- function(text){
		# Define the URL
		url <- "https://aidaho-edu.uni-hohenheim.de/ollama/queue"

		# here is the base prompt
		base_prompt <- 'You are an advanced AI assistant. You was created to perform sentiment analysis on input text.
		I need you to classify each text you receive and provide your analysis using the following JSON schema:
		{
		"sentiment": {
		"type": "number",
		"description": "A floating-point representation of the sentiment of the text,
		rounded to two decimal places. Scale ranges from -1.0 (negative) to 1.0 (positive),
		where 0.0 represents neutral sentiment.",
		"required": true
		},
		"emotions": {
		"type": "array",
		"description": "A list of one or multiple emotions of the NRC dictionary
		that are most relevant to the text.
		The possible values are: anger, anticipation, disgust, fear, joy,
		sadness, surprise, trust. Assign only these emotions!",
		"required": true
		}
		}

		Always respond with a valid JSON object adhering to this schema and never add
		more to the JSON format.
		Do not include any other text or messages in your response!
		Exclude markdown. Ignore if the sentence is incomplete or
		contains spelling errors.

		INPUT TEXT is'
		# here you have to run
		prompt <- paste(base_prompt,text)



		# Construct the request body as a named list
		payload <- list(
		model = "deepseek-r1:32b",
		options = list(seed=42),
		endpoint = "/api/generate",
		data = list(
		stream = FALSE,
		prompt = prompt
		)
		)

		# Make the POST request
		response <- httr::POST(
		url = url,
		add_headers("Content-Type" = "application/json"),
		body = payload,
		encode = "json"
		)

		content <- httr::content(response, as = "text", encoding = "UTF-8")
		parsed <- jsonlite::fromJSON(content)

		json_str <- stringr::str_extract(parsed$response, "\\{(?:[^{}]\|\\{[^{}]\\})\\}")
		json_str <- gsub("\\\\\"", "\"", json_str)
		json_str <- gsub("\\\\n", " ", json_str)

		# Now safely parse it
		return(jsonlite::fromJSON(json_str))
		}