Ananlyis on Q1 has been done. (93978a21) · Commits · Hetvi Ariwala / IntroADS_Ass2_Team18

02_code/R/Script 2 - Sentiment analysis.R

+33 −24

Original line number	Diff line number	Diff line
		@@ -8,9 +8,10 @@ install.packages("tm")
		install.packages("RColorBrewer")
		install.packages("ggplot2")
		install.packages("dyplr")
		install.packages("irr")
		install.packages("lpSolve")

		# Loading of packages
		library(tidytext)
		library(stringr)
		library(syuzhet)
		library(NLP)
		@@ -18,11 +19,13 @@ library(tm)
		library(RColorBrewer)
		library(ggplot2)
		library(dplyr)
		library(irr)
		library(lpSolve)


		#loading necessary dataset
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")

		# Only keeping necessary columns for analysis
		rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews)
		@@ -30,12 +33,12 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
		# Manual sentiment analysis-----------------------------------------------------

		# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
		setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
		setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")

		positive.words <- readLines("positive-words.txt")
		negative.words <- readLines("negative-words.txt")

		# Function to perform sentiment analysis on a single review.sent=sentiment
		# Function to perform sentiment analysis on a single review. (sent=sentiment)
		sent.analysis <- function(review) {

		# Tokenize the review into words
		@@ -101,7 +104,7 @@ percentage.identical <- mean(rev.sentiment$identical.score) * 100
		# Display the result
		cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n")

		# Sentiment analysis by using other two dictionaries from syuhet package--------
		# Sentiment analysis by using other two dictionaries from syuzhet package--------

		# sentiment score using AFINN dictionary
		rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn")
		@@ -149,21 +152,22 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
		axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text
		plot.margin = margin(25, 25, 25, 25), # right margin for legend
		legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
		ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg",
		ggsave(file= "C:/Users/akluj/OneDrive/Desktop/Emotions.jpg",
		width=15, height=15, units = "cm", dpi=1600)

		# Calculating weightage of enotions
		# Calculating weightage of emotions
		nrc.scores <- nrc.scores %>%
		mutate(weightage = score / sum(score) *100)

		cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n")

		cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")
		cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")

		# Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------

		merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

		# Checking the consistency of sentiment with positive and negative voted_up

		merged <- merge(sent.label= rev.sentiment$sent.label, voted_up= gamereviews$voted_up, by.x = "id",
		by.y = "recommendationid", all.x = TRUE)
		comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up)

		comparison <- comparison %>%
		@@ -171,17 +175,22 @@ comparison <- comparison %>%
		voted_up==TRUE ~ "Positive",
		voted_up==FALSE ~ "Negative"))

		comparison$compare <- ifelse(comparison$sent.label == "Positive" & comparison$voted_up_mapped == "Positive", "positive",
		ifelse(comparison$sent.label == "Negative" & comparison$voted_up_mapped == "Negative", "positive",
		"Neutral"))

		# Counting the number of match
		match.count <- sum(comparison$compare=="Match")
		mismatch.count <- sum(comparison$compare=="Mismatch")
		# Converting 'voted_up' to a factor for better comparison
		comparison$voted_up <- as.factor(comparison$voted_up)

		total.count <- length(comparison$compare)
		# Creating a matrix
		conf_matrix <- table(comparison$sent.label, comparison$voted_up)
		print(conf_matrix)

		# Calculate percentage agreement
		total_obs <- sum(conf_matrix)
		correct_agreement <- sum(diag(conf_matrix))
		percentage_agreement <- correct_agreement / total_obs * 100


		# Printing result
		cat("Percentage Agreement:", percentage_agreement, "%\n")

		# Q2

		# Calculating ration of match and mismatch
		match.ratio <- match.count / total.count
		# Display the result
		print(comparison_result)