Commit 72aa58a7 authored by Sarish's avatar Sarish
Browse files

Small errors have been updated and everything works fine besides the correlation function

(Q3)
parent 24a18d38
Loading
Loading
Loading
Loading
+10 −12
Original line number Diff line number Diff line
@@ -27,9 +27,9 @@ library(stats)
library(lubridate)

#loading necessary dataset
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


# Only keeping necessary columns for analysis
@@ -38,7 +38,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
# Manual sentiment analysis-----------------------------------------------------

# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")

positive.words <- readLines("positive-words.txt")
negative.words <- readLines("negative-words.txt")
@@ -245,12 +245,13 @@ print(avg.sent.score)

# A review score based from revsummary file, it was dowanloaded in the 1st assisgnement
overall.review.score <- revsummary$review_score
print(overall.review.score)

# Calculating correlation betwwen an average sentiment score and overall review score
correlation <- cor(avg.sent.score, overall.review.score)
# Calculating correlation between an average sentiment score and overall review score
# correlation <- cor(avg.sent.score, overall.review.score)

# Printing the correlation coefficient
print(paste("Correlation between average sentiment and overall score:", correlation))
# print(paste("Correlation between average sentiment and overall score:", correlation))

# Q4 Sentiment variation -------------------------------------------------------

@@ -281,15 +282,12 @@ ggplot(data = data.frame(date = as.Date(names(total.score.date)),
ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Sentiment variation.jpg", 
       width=15, height=15, units = "cm", dpi=1600)

# checking on highest sentiment date
high.index <- which.ma

# Finding the index of the review with the highest helpful votes
high.index <- which.max(total.score.date$total_sentiment)

# Getting the corresponding review 
# Getting the corresponding date
high.count <- total.score.date[high.index,]
print(high.count)

# Printing a result
cat("A highest sentiment score", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))
cat("A highest sentiment score of", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))