Emotion analysis has been done with plotting. (95dd33a7) · Commits · Hetvi Ariwala / IntroADS_Ass2_Team18

02_code/R/Script 2 - Sentiment analysis.R

+65 −33

Original line number	Diff line number	Diff line
		@@ -5,23 +5,27 @@ install.packages("stringr")
		install.packages("syuzhet")
		install.packages("NLP")
		install.packages("tm")
		install.packages("wordcloud")
		install.packages("RColorBrewer")
		install.packages("ggplot2")
		install.packages("dyplr")

		# Loading of packages
		library(tidytext)
		library(stringr)
		library(syuzhet)
		library(NLP)
		library(tm)
		library(wordcloud)
		library(RColorBrewer)
		library(ggplot2)
		library(dplyr)


		#loading necessary dataset
		setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data")
		reviews.clean <- read.csv("reviews.clean.csv")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
		load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")

		# Only keeping necessary columns for analysis
		rev.sentiment <- data.frame(reviews = reviews.clean$review)
		rev.sentiment <- data.frame(reviews = reviews.clean$reviews)

		# Manual sentiment analysis-----------------------------------------------------

		@@ -31,7 +35,7 @@ setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
		positive.words <- readLines("positive-words.txt")
		negative.words <- readLines("negative-words.txt")

		# Function to perform sentiment analysis on a single review; in a code, sent=sentiment
		# Function to perform sentiment analysis on a single review.sent=sentiment
		sent.analysis <- function(review) {

		# Tokenize the review into words
		@@ -56,27 +60,27 @@ sent.analysis <- function(review) {
		sent.score <- sent.score * -1
		}

		# Ascribe positive (1), negative (-1), or neutral (0) sentiment
		# sentiment labeling positive (1), negative (-1), or neutral (0)
		sentiment <- ifelse(sent.score > 0, 1, ifelse(sent.score < 0, -1, 0))

		return(sentiment)
		}

		# Apply sentiment analysis to all reviews in the dataframe
		rev.sentiment$sentiment.score <- sapply(rev.sentiment$reviews, sent.analysis)
		rev.sentiment$sent.score <- sapply(rev.sentiment$reviews, sent.analysis)

		# Labeling the sentiment score with sentiment
		rev.sentiment$sentiment.label <- ifelse(rev.sentiment$sentiment.score == 1, "Positive",
		ifelse(rev.sentiment$sentiment.score == -1, "Negative", "Neutral"))
		rev.sentiment$sent.label <- ifelse(rev.sentiment$sent.score == 1, "Positive",
		ifelse(rev.sentiment$sent.score == -1, "Negative", "Neutral"))

		# Counting the total number reviews:
		positive.counts <- sum(rev.sentiment$sentiment.label == "Positive")
		positive.counts <- sum(rev.sentiment$sent.label == "Positive")
		cat("The total number of positive reviews are", positive.counts)

		negative.counts <- sum(rev.sentiment$sentiment.label == "Negative")
		negative.counts <- sum(rev.sentiment$sent.label == "Negative")
		cat("The total number of negative reviews are", negative.counts)

		neutral.counts <- sum(rev.sentiment$sentiment.label == "Neutral")
		neutral.counts <- sum(rev.sentiment$sent.label == "Neutral")
		cat("The total number of neutral reviews are", neutral.counts)

		# Syuzhet sentiment analysis using bing dictionary------------------------------
		@@ -85,17 +89,17 @@ cat("The total number of neutral reviews are", neutral.counts)
		rev.sentiment$score.syuzhet <- get_sentiment(rev.sentiment$reviews, method = "bing")

		# Labeling the sentiment score with sentiment
		rev.sentiment$sent.label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive",
		rev.sentiment$label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive",
		ifelse(rev.sentiment$score.syuzhet <= -1, "Negative", "Neutral"))

		# Sentiment scores by manually and with syuzhet package and chceking if its identical or not
		rev.sentiment$identical.score <- ifelse(rev.sentiment$sentiment.score == score.syuzhet, 1 , 0)
		# Sentiment scores by manually and with syuzhet package and checking if its identical or not
		rev.sentiment$identical.score <- ifelse(rev.sentiment$sent.score == rev.sentiment$score.syuzhet, 1 , 0)

		# Calculate the percentage of identical scores
		percentage.identical <- mean(rev.sentiment$identical.score) * 100

		# Display the result
		cat("Percentage of Identical Score:", percentage.identical, "%\n")
		cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n")

		# Sentiment analysis by using other two dictionaries from syuhet package--------

		@@ -103,29 +107,57 @@ cat("Percentage of Identical Score:", percentage.identical, "%\n")
		rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn")

		# Labeling sentiment score with sentiment
		rev.sentiment$sent.label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
		rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
		ifelse(rev.sentiment$score.afinn <= -1, "Negative", "Neutral"))

		# Sentiment score using nrc dictionary
		rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)

		# Wordcloud based on emotions---------------------------------------------------

		reviews <- rev.sentiment$reviews
		# Emotion analysis---------------------------------------------------

		# Extract the NRC emotion scores for each term
		# Assuming 'nrc_scores' is a column containing NRC scores for each review in your rev.sentiment dataframe
		nrc_scores <- rev.sentiment$score.nrc
		# Extracting the NRC emotion scores for each term
		nrc.scores <- rev.sentiment$score.nrc

		#Sum the NRC scores for each emotion across all reviews
		aggregate_nrc <- colSums(nrc_scores, na.rm = TRUE)
		nrc.scores <- colSums(nrc.scores, na.rm = TRUE)

		# Convert the aggregated scores to a data frame
		nrc_aggregated_df <- data.frame(emotion = names(aggregate_nrc), score = aggregate_nrc)

		# Remove rows with NAs (if any)
		nrc_aggregated_df <- nrc_aggregated_df[complete.cases(nrc_aggregated_df), ]

		# Create the word cloud
		wordcloud(words = nrc_aggregated_df$emotion, freq = nrc_aggregated_df$score,
		min.freq = 1, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))
		nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores)

		# Order the dataframe by scores in descending order
		nrc.scores <- nrc.scores[order(-nrc.scores$score), ]

		# Define a custom color palette with light colors
		custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd")

		# Reorder the factor levels of emotion based on scores
		nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion)

		# Bar plotting of emotions with values
		ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
		geom_bar(stat = "identity", color = "black", width = 0.7) +
		geom_text(aes(label = round(score, 2)), vjust = 1.5, color = "black", size = 3) +
		scale_fill_manual(values = custom_palette) +
		labs(title = "Emotion Scores",
		x = "Emotions",
		y = "Scores") +
		theme(legend.position = "bottom",
		legend.box.margin = margin(3, 3, 3, 3),
		plot.title = element_text(hjust = 0.5, face = "bold"), # Bolds the title text
		axis.title.x = element_text(face = "bold"), # Bolds the x-axis title text
		axis.title.y = element_text(face = "bold"), # Bolds the y-axis title text
		axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text
		plot.margin = margin(25, 25, 25, 25), # right margin for legend
		legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
		ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg",
		width=15, height=15, units = "cm", dpi=1600)

		# Calculating weightage of enotions
		nrc.scores <- nrc.scores %>%
		mutate(weightage = score / sum(score) *100)

		cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n")

		cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")

		# Checking the consistency of sentiment with positive and negative voted_up