Commit 95dd33a7 authored by Hetvi Ariwala's avatar Hetvi Ariwala
Browse files

Emotion analysis has been done with plotting.

parent aad957bf
Loading
Loading
Loading
Loading
+65 −33
Original line number Diff line number Diff line
@@ -5,23 +5,27 @@ install.packages("stringr")
install.packages("syuzhet")
install.packages("NLP")
install.packages("tm")
install.packages("wordcloud")
install.packages("RColorBrewer")
install.packages("ggplot2")
install.packages("dyplr")

# Loading of packages
library(tidytext)
library(stringr)
library(syuzhet)
library(NLP)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(ggplot2)
library(dplyr)


#loading necessary dataset
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data")
reviews.clean <- read.csv("reviews.clean.csv")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")

# Only keeping necessary columns for analysis
rev.sentiment <- data.frame(reviews = reviews.clean$review)
rev.sentiment <- data.frame(reviews = reviews.clean$reviews)

# Manual sentiment analysis-----------------------------------------------------

@@ -31,7 +35,7 @@ setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
positive.words <- readLines("positive-words.txt")
negative.words <- readLines("negative-words.txt")

# Function to perform sentiment analysis on a single review; in a code, sent=sentiment
# Function to perform sentiment analysis on a single review.sent=sentiment
sent.analysis <- function(review) {
  
  # Tokenize the review into words
@@ -56,27 +60,27 @@ sent.analysis <- function(review) {
    sent.score <- sent.score * -1
  }
  
  # Ascribe positive (1), negative (-1), or neutral (0) sentiment
  # sentiment labeling positive (1), negative (-1), or neutral (0)
  sentiment <- ifelse(sent.score > 0, 1, ifelse(sent.score < 0, -1, 0))
  
  return(sentiment)
}

# Apply sentiment analysis to all reviews in the dataframe
rev.sentiment$sentiment.score <- sapply(rev.sentiment$reviews, sent.analysis)
rev.sentiment$sent.score <- sapply(rev.sentiment$reviews, sent.analysis)

# Labeling the sentiment score with sentiment
rev.sentiment$sentiment.label <- ifelse(rev.sentiment$sentiment.score == 1, "Positive",
                                        ifelse(rev.sentiment$sentiment.score == -1, "Negative", "Neutral"))
rev.sentiment$sent.label <- ifelse(rev.sentiment$sent.score == 1, "Positive",
                                        ifelse(rev.sentiment$sent.score == -1, "Negative", "Neutral"))

# Counting the total number reviews:
positive.counts <- sum(rev.sentiment$sentiment.label == "Positive")
positive.counts <- sum(rev.sentiment$sent.label == "Positive")
cat("The total number of positive reviews are", positive.counts)

negative.counts <- sum(rev.sentiment$sentiment.label == "Negative")
negative.counts <- sum(rev.sentiment$sent.label == "Negative")
cat("The total number of negative reviews are", negative.counts)

neutral.counts <- sum(rev.sentiment$sentiment.label == "Neutral")
neutral.counts <- sum(rev.sentiment$sent.label == "Neutral")
cat("The total number of neutral reviews are", neutral.counts)

# Syuzhet sentiment analysis using bing dictionary------------------------------
@@ -85,17 +89,17 @@ cat("The total number of neutral reviews are", neutral.counts)
rev.sentiment$score.syuzhet <- get_sentiment(rev.sentiment$reviews, method = "bing")

# Labeling the sentiment score with sentiment
rev.sentiment$sent.label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive",
rev.sentiment$label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive",
                                        ifelse(rev.sentiment$score.syuzhet <= -1, "Negative", "Neutral"))

# Sentiment scores by manually and with syuzhet package and chceking if its identical or not
rev.sentiment$identical.score <- ifelse(rev.sentiment$sentiment.score == score.syuzhet, 1 , 0)
# Sentiment scores by manually and with syuzhet package and checking if its identical or not
rev.sentiment$identical.score <- ifelse(rev.sentiment$sent.score == rev.sentiment$score.syuzhet, 1 , 0)

# Calculate the percentage of identical scores
percentage.identical <- mean(rev.sentiment$identical.score) * 100

# Display the result
cat("Percentage of Identical Score:", percentage.identical, "%\n")
cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n")

# Sentiment analysis by using other two dictionaries from syuhet package--------

@@ -103,29 +107,57 @@ cat("Percentage of Identical Score:", percentage.identical, "%\n")
rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn")

# Labeling sentiment score with sentiment
rev.sentiment$sent.label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
                                         ifelse(rev.sentiment$score.afinn <= -1, "Negative", "Neutral"))

# Sentiment score using nrc dictionary
rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)

# Wordcloud based on emotions---------------------------------------------------

reviews <- rev.sentiment$reviews
# Emotion analysis---------------------------------------------------

# Extract the NRC emotion scores for each term
# Assuming 'nrc_scores' is a column containing NRC scores for each review in your rev.sentiment dataframe
nrc_scores <- rev.sentiment$score.nrc
# Extracting the NRC emotion scores for each term
nrc.scores <- rev.sentiment$score.nrc

#Sum the NRC scores for each emotion across all reviews
aggregate_nrc <- colSums(nrc_scores, na.rm = TRUE)
nrc.scores <- colSums(nrc.scores, na.rm = TRUE)

# Convert the aggregated scores to a data frame
nrc_aggregated_df <- data.frame(emotion = names(aggregate_nrc), score = aggregate_nrc)

# Remove rows with NAs (if any)
nrc_aggregated_df <- nrc_aggregated_df[complete.cases(nrc_aggregated_df), ]

# Create the word cloud
wordcloud(words = nrc_aggregated_df$emotion, freq = nrc_aggregated_df$score, 
          min.freq = 1, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))
nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores)

# Order the dataframe by scores in descending order
nrc.scores <- nrc.scores[order(-nrc.scores$score), ]

# Define a custom color palette with light colors
custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd")

# Reorder the factor levels of emotion based on scores
nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion)

# Bar plotting of emotions with values
ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
  geom_bar(stat = "identity", color = "black", width = 0.7) +
  geom_text(aes(label = round(score, 2)), vjust = 1.5, color = "black", size = 3) + 
  scale_fill_manual(values = custom_palette) +
  labs(title = "Emotion Scores",
       x = "Emotions",
       y = "Scores") +
  theme(legend.position = "bottom", 
    legend.box.margin = margin(3, 3, 3, 3), 
    plot.title = element_text(hjust = 0.5, face = "bold"),  # Bolds the title text
    axis.title.x = element_text(face = "bold"),  # Bolds the x-axis title text
    axis.title.y = element_text(face = "bold"),  # Bolds the y-axis title text
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"),  # Bolds the x-axis text
    plot.margin = margin(25, 25, 25, 25),  # right margin for legend
    legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg", 
       width=15, height=15, units = "cm", dpi=1600)

# Calculating weightage of enotions
nrc.scores <- nrc.scores %>%
  mutate(weightage = score / sum(score) *100)

cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n")

cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")

# Checking the consistency of sentiment with positive and negative voted_up