Commit 270d1cdb authored by Hetvi Ariwala's avatar Hetvi Ariwala
Browse files

Minute errors found and corrected.

parent 4a1691d1
Loading
Loading
Loading
Loading
+20 −10
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ if(!require("dyplr")) install.packages("dyplr")
if(!require("lpSolve")) install.packages("lpSolve")
if(!require("irr")) install.packages("irr")
if(!require("lubridate")) install.packages("lubridate")
if(!require("xtable")) install.packages("xtable")

# Loading of packages
library(syuzhet)
@@ -27,13 +28,15 @@ library(tm)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(lpsolve)
library(irr)
library(lubridate)
library(xtable)

#loading necessary dataset
load("~/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("~/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
load("~/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


# Only keeping necessary columns for analysis
@@ -43,7 +46,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid,
# Manual sentiment analysis-----------------------------------------------------

# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
setwd("~/ADS/introads_ass2_team18/00_docs")
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")

positive.words <- readLines("positive-words.txt")
negative.words <- readLines("negative-words.txt")
@@ -129,16 +132,14 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
# Sentiment score using nrc dictionary
rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)

# calculating coorelation between the results of dictionaries
correlation_matrix <- cor(rev.sentiment[c("score.manual", "score.bing", "score.afinn")])

# Creating LaTeX-formatted table
latex_table <- xtable(correlation_matrix, caption = "Correlation Matrix")

# Printing the LaTeX code
print(latex_table, include.rownames = TRUE)

save(rev.sentiment,file = "~/ADS/introads_ass2_team18/01_data/rev.sentiment.RData")
save(rev.sentiment,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.sentiment.RData")

# Emotion analysis---------------------------------------------------

# Extracting the NRC emotion scores for each term
@@ -175,7 +176,7 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"),  # Bolds the x-axis text
    plot.margin = margin(25, 25, 25, 25),  # right margin for legend
    legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
ggsave(file = "~/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, 
ggsave(file = "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, 
       height = 15, units = "cm", dpi = 1600)


@@ -197,6 +198,9 @@ cat("An emotion with lowest weightage is disgust with",
rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", 
                    by.y = "recommendationid", all.x = TRUE)

# Saving the dataset, as its a primary dataset to perform further analysis. 
save(rev.merged,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.merged.RData")

# Due to the length of the 'rev.merged' dataframe, a new dataframe was derived to 
#facilitate efficient analysis. This new dataframe selectively extracts essential columns
#and information, streamlining the analysis process.
@@ -243,6 +247,8 @@ cat("A Percentage of the relevance between manual sentiment score and

# Adding another column of votes_up to check on the helpfulness of the review
sent.compare$votes_up <- rev.merged$votes_up

# Counting the number of sentiments
sent.count <- table(sent.compare$label.manual)

# Displaying the counts
@@ -298,6 +304,10 @@ sent.compare$date <- as.Date(sent.compare$date)
# adding a column of sent score from rev.merged into sent.compare
sent.compare$score.manual <- rev.merged$score.manual

# Saving the dataset, as its  has been extracted from rev.merged datframe which is too large. 
# Hence, sent.compare dataframe made for easiness.
save(sent.compare,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/sent.compare.RData")

# Aggregating total sentiment scores by date
total.score.date <- tapply(sent.compare$score.manual, sent.compare$date, sum)

@@ -312,7 +322,7 @@ ggplot(data = total.score.date) +
  labs(x = "Date",
       y = "Total Sentiment Score") +
  theme_minimal()
ggsave(file= "~/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", 
ggsave(file= "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", 
       width=15, height=15, units = "cm", dpi=1600)

# Finding the index of the review with the highest helpful votes