Small errors have been updated and everything works fine besides the correlation function (72aa58a7) · Commits · Hetvi Ariwala / IntroADS_Ass2_Team18

02_code/R/Script 2 - Sentiment analysis.R

+10 −12

Original line number	Diff line number	Diff line
		@@ -27,9 +27,9 @@ library(stats)
		library(lubridate)

		#loading necessary dataset
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


		# Only keeping necessary columns for analysis
		@@ -38,7 +38,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
		# Manual sentiment analysis-----------------------------------------------------

		# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
		setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
		setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")

		positive.words <- readLines("positive-words.txt")
		negative.words <- readLines("negative-words.txt")
		@@ -245,12 +245,13 @@ print(avg.sent.score)

		# A review score based from revsummary file, it was dowanloaded in the 1st assisgnement
		overall.review.score <- revsummary$review_score
		print(overall.review.score)

		# Calculating correlation betwwen an average sentiment score and overall review score
		correlation <- cor(avg.sent.score, overall.review.score)
		# Calculating correlation between an average sentiment score and overall review score
		# correlation <- cor(avg.sent.score, overall.review.score)

		# Printing the correlation coefficient
		print(paste("Correlation between average sentiment and overall score:", correlation))
		# print(paste("Correlation between average sentiment and overall score:", correlation))

		# Q4 Sentiment variation -------------------------------------------------------

		@@ -281,15 +282,12 @@ ggplot(data = data.frame(date = as.Date(names(total.score.date)),
		ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Sentiment variation.jpg",
		width=15, height=15, units = "cm", dpi=1600)

		# checking on highest sentiment date
		high.index <- which.ma

		# Finding the index of the review with the highest helpful votes
		high.index <- which.max(total.score.date$total_sentiment)

		# Getting the corresponding review
		# Getting the corresponding date
		high.count <- total.score.date[high.index,]
		print(high.count)

		# Printing a result
		cat("A highest sentiment score", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))
		cat("A highest sentiment score of", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))