Minute errors found and corrected. (270d1cdb) · Commits · Hetvi Ariwala / IntroADS_Ass2_Team18

02_code/R/Script 2 - Sentiment analysis.R

+20 −10

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@ if(!require("dyplr")) install.packages("dyplr")
		if(!require("lpSolve")) install.packages("lpSolve")
		if(!require("irr")) install.packages("irr")
		if(!require("lubridate")) install.packages("lubridate")
		if(!require("xtable")) install.packages("xtable")

		# Loading of packages
		library(syuzhet)
		@@ -27,13 +28,15 @@ library(tm)
		library(RColorBrewer)
		library(ggplot2)
		library(dplyr)
		library(lpsolve)
		library(irr)
		library(lubridate)
		library(xtable)

		#loading necessary dataset
		load("~/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("~/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
		load("~/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


		# Only keeping necessary columns for analysis
		@@ -43,7 +46,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid,
		# Manual sentiment analysis-----------------------------------------------------

		# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
		setwd("~/ADS/introads_ass2_team18/00_docs")
		setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")

		positive.words <- readLines("positive-words.txt")
		negative.words <- readLines("negative-words.txt")
		@@ -129,16 +132,14 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
		# Sentiment score using nrc dictionary
		rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)

		# calculating coorelation between the results of dictionaries
		correlation_matrix <- cor(rev.sentiment[c("score.manual", "score.bing", "score.afinn")])

		# Creating LaTeX-formatted table
		latex_table <- xtable(correlation_matrix, caption = "Correlation Matrix")

		# Printing the LaTeX code
		print(latex_table, include.rownames = TRUE)

		save(rev.sentiment,file = "~/ADS/introads_ass2_team18/01_data/rev.sentiment.RData")
		save(rev.sentiment,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.sentiment.RData")

		# Emotion analysis---------------------------------------------------

		# Extracting the NRC emotion scores for each term
		@@ -175,7 +176,7 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
		axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text
		plot.margin = margin(25, 25, 25, 25), # right margin for legend
		legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
		ggsave(file = "~/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15,
		ggsave(file = "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15,
		height = 15, units = "cm", dpi = 1600)


		@@ -197,6 +198,9 @@ cat("An emotion with lowest weightage is disgust with",
		rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id",
		by.y = "recommendationid", all.x = TRUE)

		# Saving the dataset, as its a primary dataset to perform further analysis.
		save(rev.merged,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.merged.RData")

		# Due to the length of the 'rev.merged' dataframe, a new dataframe was derived to
		#facilitate efficient analysis. This new dataframe selectively extracts essential columns
		#and information, streamlining the analysis process.
		@@ -243,6 +247,8 @@ cat("A Percentage of the relevance between manual sentiment score and

		# Adding another column of votes_up to check on the helpfulness of the review
		sent.compare$votes_up <- rev.merged$votes_up

		# Counting the number of sentiments
		sent.count <- table(sent.compare$label.manual)

		# Displaying the counts
		@@ -298,6 +304,10 @@ sent.compare$date <- as.Date(sent.compare$date)
		# adding a column of sent score from rev.merged into sent.compare
		sent.compare$score.manual <- rev.merged$score.manual

		# Saving the dataset, as its has been extracted from rev.merged datframe which is too large.
		# Hence, sent.compare dataframe made for easiness.
		save(sent.compare,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/sent.compare.RData")

		# Aggregating total sentiment scores by date
		total.score.date <- tapply(sent.compare$score.manual, sent.compare$date, sum)

		@@ -312,7 +322,7 @@ ggplot(data = total.score.date) +
		labs(x = "Date",
		y = "Total Sentiment Score") +
		theme_minimal()
		ggsave(file= "~/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg",
		ggsave(file= "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg",
		width=15, height=15, units = "cm", dpi=1600)

		# Finding the index of the review with the highest helpful votes