Commit 24a18d38 authored by Hetvi Ariwala's avatar Hetvi Ariwala
Browse files

Necessary tasks have been completed along with some additional analysis on user sentiments.

parent b2dd3d55
Loading
Loading
Loading
Loading
+64 −4
Original line number Diff line number Diff line
@@ -9,6 +9,9 @@ install.packages("ggplot2")
install.packages("dyplr")
install.packages("lpSolve")
install.packages("irr")
install.packages("stats")
install.packages("lubridate")



# Loading of packages
@@ -20,12 +23,12 @@ library(ggplot2)
library(dplyr)
library(lpSolve)
library(irr)


library(stats)
library(lubridate)

#loading necessary dataset
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass1_team18-main/01_data/raw/revsummary.RData.csv")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


@@ -233,3 +236,60 @@ cat("A review with the highest helpfulness is", max.helpfulness.review$sent.labe
# all types of sentiment analysis and it's sentiment is being neutral in all. 

# Q3 

# Calculating the correlation between average sentiment scores and overall review scores

# An average sentiment score of reviews based of manual sentiment analysis
avg.sent.score <- mean(rev.sentiment$sent.score, na.rm = TRUE)
print(avg.sent.score)

# A review score based from revsummary file, it was dowanloaded in the 1st assisgnement
overall.review.score <- revsummary$review_score

# Calculating correlation betwwen an average sentiment score and overall review score
correlation <- cor(avg.sent.score, overall.review.score)

# Printing the correlation coefficient
print(paste("Correlation between average sentiment and overall score:", correlation))

# Q4 Sentiment variation -------------------------------------------------------

# adding column of date from rev.merged dataframe into sent.compare dataframe
sent.compare$date <- rev.merged$timestamp_created

# adding a column of sent score from rev.merged into sent.compare
sent.compare$sent.score <- rev.merged$sent.score

# Changing class of date column
sent.compare$date <- as.Date(sent.compare$date)

# Aggregating total sentiment scores by date
total.score.date <- tapply(sent.compare$sent.score, sent.compare$date, sum)

total.score.date <- data.frame(date = as.Date(names(total.score.date)),
                          total_sentiment = as.numeric(total.score.date))


# Plotting of sentiment score over a period of time
ggplot(data = data.frame(date = as.Date(names(total.score.date)), 
                         sentiment.score = as.numeric(total.score.date))) +
  geom_line(aes(x = date, y = sentiment.score), color = "blue") +
  labs(title = "Sentiment Variation Over Time",
       x = "Date",
       y = "Total Sentiment Score") +
  theme_minimal()
ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Sentiment variation.jpg", 
       width=15, height=15, units = "cm", dpi=1600)

# checking on highest sentiment date
high.index <- which.ma

# Finding the index of the review with the highest helpful votes
high.index <- which.max(total.score.date$total_sentiment)

# Getting the corresponding review 
high.count <- total.score.date[high.index,]
print(high.count)

# Printing a result
cat("A highest sentiment score", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))