Commit 4580251f authored by Hetvi Ariwala's avatar Hetvi Ariwala
Browse files

Q2 has been done.

parent 93978a21
Loading
Loading
Loading
Loading
+65 −26
Original line number Diff line number Diff line
rm(list = ls())

# Installation of packages
install.packages("stringr")
install.packages("syuzhet")
install.packages("NLP")
install.packages("tm")
install.packages("RColorBrewer")
install.packages("ggplot2")
install.packages("dyplr")
install.packages("irr")
install.packages("lpSolve")
install.packages("irr")


# Loading of packages
library(stringr)
library(syuzhet)
library(NLP)
library(tm)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(irr)
library(lpSolve)
library(irr)



#loading necessary dataset
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass1_team18-main/01_data/raw/revsummary.RData.csv")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")


# Only keeping necessary columns for analysis
rev.sentiment <- data.frame(id= reviews.clean$recommendationid,  reviews = reviews.clean$reviews)
@@ -33,7 +35,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
# Manual sentiment analysis-----------------------------------------------------

# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")

positive.words <- readLines("positive-words.txt")
negative.words <- readLines("negative-words.txt")
@@ -116,24 +118,25 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive",
# Sentiment score using nrc dictionary
rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews)


# Emotion analysis---------------------------------------------------

# Extracting the NRC emotion scores for each term
nrc.scores <- rev.sentiment$score.nrc

#Sum the NRC scores for each emotion across all reviews
# Suming up the NRC scores for each emotion across all reviews
nrc.scores <- colSums(nrc.scores, na.rm = TRUE)

# Convert the aggregated scores to a data frame
# Converting the aggregated scores to a data frame
nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores)

# Order the dataframe by scores in descending order
# Ordering the dataframe by scores in descending order
nrc.scores <- nrc.scores[order(-nrc.scores$score),]

# Define a custom color palette with light colors
# Defining a custom color palette with light colors using RColorBrewer package
custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd")

# Reorder the factor levels of emotion based on scores
# Reordering the factor levels of emotion based on scores
nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion)

# Bar plotting of emotions with values
@@ -165,32 +168,68 @@ cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weighta

# Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------

merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up)
# ANother datafarme to particularly check on sentiment consistency
sent.compare <- data.frame(id = rev.merged$id, reviews = rev.merged$reviews, 
                           sent.label=rev.merged$sent.label, voted_up=rev.merged$voted_up)

comparison <- comparison %>%
sent.compare <- sent.compare %>%
  mutate(voted_up_mapped = case_when(
    voted_up==TRUE ~ "Positive",
    voted_up==FALSE ~ "Negative"))


# Converting 'voted_up' to a factor for better comparison
comparison$voted_up <- as.factor(comparison$voted_up)
sent.compare$voted_up <- as.factor(sent.compare$voted_up)

# Creating a matrix
conf_matrix <- table(comparison$sent.label, comparison$voted_up)
print(conf_matrix)
# Creating a confusion matrix matrix
conf.matrix <- table(sent.compare$sent.label, sent.compare$voted_up)
print(conf.matrix)

# Calculate percentage agreement
total_obs <- sum(conf_matrix)
correct_agreement <- sum(diag(conf_matrix))
percentage_agreement <- correct_agreement / total_obs * 100
# Calculating a  percentage agreement
total.obs <- sum(conf.matrix)
correct.agreement <- sum(diag(conf.matrix))
percentage.agreement <- correct.agreement / total.obs * 100


# Printing result
cat("Percentage Agreement:", percentage_agreement, "%\n")
cat("Percentage Agreement:", percentage.agreement, "%\n")

# Q2 

# Creating a table of counts for each sentiment label

# Adding another column of votes_up to check on the helpfulness of the review
sent.compare$votes_up <- rev.merged$votes_up
sent.count <- table(sent.compare$sent.label)

# Displaying the counts
print(sent.count)

# Calculating an average helpfulness for each sentiment label
avg.sent <- tapply(
  sent.compare$votes_up,
  sent.compare$sent.label,
  mean,
  na.rm = TRUE
)

# Print results
print(avg.sent)

# Finding the index of the review with the highest helpful votes
max.help.index <- which.max(sent.compare$votes_up)

# Getting the corresponding review 
max.helpfulness.review <- sent.compare[max.help.index, ]

# Printing a message for highest helpfulness review
cat("A review with the highest helpfulness is", max.helpfulness.review$sent.label, 
    "sentiment and its' helpfulness rank is", max.helpfulness.review$votes_up, 
    "which is at",max.help.index)

# I crossed check the review in rev.merged dataframe and I found that its sentiment label with 
# all types of sentiment analysis and it's sentiment is being neutral in all. 

# Q3 
 No newline at end of file