Commit 93978a21 authored by Sarish's avatar Sarish
Browse files

Ananlyis on Q1 has been done.

parent 6480e277
Loading
Loading
Loading
Loading
+33 −24
Original line number Diff line number Diff line
@@ -8,9 +8,10 @@ install.packages("tm")
install.packages("RColorBrewer")
install.packages("ggplot2")
install.packages("dyplr")
install.packages("irr")
install.packages("lpSolve")

# Loading of packages
library(tidytext)
library(stringr)
library(syuzhet)
library(NLP)
@@ -18,11 +19,13 @@ library(tm)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(irr)
library(lpSolve)


#loading necessary dataset
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")

# Only keeping necessary columns for analysis
rev.sentiment <- data.frame(id= reviews.clean$recommendationid,  reviews = reviews.clean$reviews)
@@ -30,12 +33,12 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie
# Manual sentiment analysis-----------------------------------------------------

# Loading of necessary words lists taken from Kaggel by Hu and Bing Liu
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs")
setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs")

positive.words <- readLines("positive-words.txt")
negative.words <- readLines("negative-words.txt")

# Function to perform sentiment analysis on a single review.sent=sentiment
# Function to perform sentiment analysis on a single review. (sent=sentiment)
sent.analysis <- function(review) {
  
  # Tokenize the review into words
@@ -101,7 +104,7 @@ percentage.identical <- mean(rev.sentiment$identical.score) * 100
# Display the result
cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n")

# Sentiment analysis by using other two dictionaries from syuhet package--------
# Sentiment analysis by using other two dictionaries from syuzhet package--------

# sentiment score using AFINN dictionary
rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn")
@@ -149,21 +152,22 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) +
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"),  # Bolds the x-axis text
    plot.margin = margin(25, 25, 25, 25),  # right margin for legend
    legend.key.size = unit(0.5, "cm")) # size adjust for legend-key
ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg", 
ggsave(file= "C:/Users/akluj/OneDrive/Desktop/Emotions.jpg", 
       width=15, height=15, units = "cm", dpi=1600)

# Calculating weightage of enotions
# Calculating weightage of emotions
nrc.scores <- nrc.scores %>%
  mutate(weightage = score / sum(score) *100)

cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n")

cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")
cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n")

# Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------

merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE)

# Checking the consistency of sentiment with positive and negative voted_up

merged <- merge(sent.label= rev.sentiment$sent.label, voted_up= gamereviews$voted_up, by.x = "id", 
                by.y = "recommendationid", all.x = TRUE)
comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up)

comparison <- comparison %>%
@@ -171,17 +175,22 @@ comparison <- comparison %>%
    voted_up==TRUE ~ "Positive",
    voted_up==FALSE ~ "Negative"))

comparison$compare <- ifelse(comparison$sent.label == "Positive" & comparison$voted_up_mapped == "Positive", "positive",
                             ifelse(comparison$sent.label == "Negative" & comparison$voted_up_mapped == "Negative", "positive",
                                    "Neutral"))

# Counting the number of match 
match.count <- sum(comparison$compare=="Match")
mismatch.count <- sum(comparison$compare=="Mismatch")
# Converting 'voted_up' to a factor for better comparison
comparison$voted_up <- as.factor(comparison$voted_up)

total.count <- length(comparison$compare)
# Creating a matrix
conf_matrix <- table(comparison$sent.label, comparison$voted_up)
print(conf_matrix)

# Calculate percentage agreement
total_obs <- sum(conf_matrix)
correct_agreement <- sum(diag(conf_matrix))
percentage_agreement <- correct_agreement / total_obs * 100


# Printing result
cat("Percentage Agreement:", percentage_agreement, "%\n")

# Q2
# Calculating ration of match and mismatch
match.ratio <- match.count / total.count
# Display the result
print(comparison_result)