Q2 has been done. (4580251f) · Commits · Hetvi Ariwala / IntroADS_Ass2_Team18

02_code/R/Script 2 - Sentiment analysis.R

+65 −26

Original line number	Diff line number	Diff line
		rm(list = ls())

		# Installation of packages
		install.packages("stringr")
		install.packages("syuzhet")
		install.packages("NLP")
		install.packages("tm")
		install.packages("RColorBrewer")
		install.packages("ggplot2")
		install.packages("dyplr")
		install.packages("irr")
		install.packages("lpSolve")
		install.packages("irr")


		# Loading of packages
		library(stringr)
		library(syuzhet)
		library(NLP)
		library(tm)
		library(RColorBrewer)
		library(ggplot2)
		library(dplyr)
		library(irr)
		library(lpSolve)
		library(irr)



		#loading necessary dataset
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass1_team18-main/01_data/raw/revsummary.RData.csv")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


		# Only keeping necessary columns for analysis
		rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews)
		@@ -33,7 +35,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
		# Manual sentiment analysis-----------------------------------------------------

		# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
		setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")
		setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")

		positive.words <- readLines("positive-words.txt")
		negative.words <- readLines("negative-words.txt")
		@@ -116,24 +118,25 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
		# Sentiment score using nrc dictionary
		rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)


		# Emotion analysis---------------------------------------------------

		# Extracting the NRC emotion scores for each term
		nrc.scores <- rev.sentiment$score.nrc

		#Sum the NRC scores for each emotion across all reviews
		# Suming up the NRC scores for each emotion across all reviews
		nrc.scores <- colSums(nrc.scores, na.rm = TRUE)

		# Convert the aggregated scores to a data frame
		# Converting the aggregated scores to a data frame
		nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores)

		# Order the dataframe by scores in descending order
		# Ordering the dataframe by scores in descending order
		nrc.scores <- nrc.scores[order(-nrc.scores$score),]

		# Define a custom color palette with light colors
		# Defining a custom color palette with light colors using RColorBrewer package
		custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd")

		# Reorder the factor levels of emotion based on scores
		# Reordering the factor levels of emotion based on scores
		nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion)

		# Bar plotting of emotions with values
		@@ -165,32 +168,68 @@ cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weighta

		# Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------

		merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

		rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

		comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up)
		# ANother datafarme to particularly check on sentiment consistency
		sent.compare <- data.frame(id = rev.merged$id, reviews = rev.merged$reviews,
		sent.label=rev.merged$sent.label, voted_up=rev.merged$voted_up)

		comparison <- comparison %>%
		sent.compare <- sent.compare %>%
		mutate(voted_up_mapped = case_when(
		voted_up==TRUE ~ "Positive",
		voted_up==FALSE ~ "Negative"))


		# Converting 'voted_up' to a factor for better comparison
		comparison$voted_up <- as.factor(comparison$voted_up)
		sent.compare$voted_up <- as.factor(sent.compare$voted_up)

		# Creating a matrix
		conf_matrix <- table(comparison$sent.label, comparison$voted_up)
		print(conf_matrix)
		# Creating a confusion matrix matrix
		conf.matrix <- table(sent.compare$sent.label, sent.compare$voted_up)
		print(conf.matrix)

		# Calculate percentage agreement
		total_obs <- sum(conf_matrix)
		correct_agreement <- sum(diag(conf_matrix))
		percentage_agreement <- correct_agreement / total_obs * 100
		# Calculating a percentage agreement
		total.obs <- sum(conf.matrix)
		correct.agreement <- sum(diag(conf.matrix))
		percentage.agreement <- correct.agreement / total.obs * 100


		# Printing result
		cat("Percentage Agreement:", percentage_agreement, "%\n")
		cat("Percentage Agreement:", percentage.agreement, "%\n")

		# Q2

		# Creating a table of counts for each sentiment label

		# Adding another column of votes_up to check on the helpfulness of the review
		sent.compare$votes_up <- rev.merged$votes_up
		sent.count <- table(sent.compare$sent.label)

		# Displaying the counts
		print(sent.count)

		# Calculating an average helpfulness for each sentiment label
		avg.sent <- tapply(
		sent.compare$votes_up,
		sent.compare$sent.label,
		mean,
		na.rm = TRUE
		)

		# Print results
		print(avg.sent)

		# Finding the index of the review with the highest helpful votes
		max.help.index <- which.max(sent.compare$votes_up)

		# Getting the corresponding review
		max.helpfulness.review <- sent.compare[max.help.index, ]

		# Printing a message for highest helpfulness review
		cat("A review with the highest helpfulness is", max.helpfulness.review$sent.label,
		"sentiment and its' helpfulness rank is", max.helpfulness.review$votes_up,
		"which is at",max.help.index)

		# I crossed check the review in rev.merged dataframe and I found that its sentiment label with
		# all types of sentiment analysis and it's sentiment is being neutral in all.

		# Q3
		No newline at end of file