Loading 02_code/R/Script 2 - Sentiment analysis.R +33 −24 Original line number Diff line number Diff line Loading @@ -8,9 +8,10 @@ install.packages("tm") install.packages("RColorBrewer") install.packages("ggplot2") install.packages("dyplr") install.packages("irr") install.packages("lpSolve") # Loading of packages library(tidytext) library(stringr) library(syuzhet) library(NLP) Loading @@ -18,11 +19,13 @@ library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(irr) library(lpSolve) #loading necessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") # Only keeping necessary columns for analysis rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews) Loading @@ -30,12 +33,12 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") # Function to perform sentiment analysis on a single review.sent=sentiment # Function to perform sentiment analysis on a single review. (sent=sentiment) sent.analysis <- function(review) { # Tokenize the review into words Loading Loading @@ -101,7 +104,7 @@ percentage.identical <- mean(rev.sentiment$identical.score) * 100 # Display the result cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n") # Sentiment analysis by using other two dictionaries from syuhet package-------- # Sentiment analysis by using other two dictionaries from syuzhet package-------- # sentiment score using AFINN dictionary rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn") Loading Loading @@ -149,21 +152,22 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) + axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text plot.margin = margin(25, 25, 25, 25), # right margin for legend legend.key.size = unit(0.5, "cm")) # size adjust for legend-key ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg", ggsave(file= "C:/Users/akluj/OneDrive/Desktop/Emotions.jpg", width=15, height=15, units = "cm", dpi=1600) # Calculating weightage of enotions # Calculating weightage of emotions nrc.scores <- nrc.scores %>% mutate(weightage = score / sum(score) *100) cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n") cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n") cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n") # Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------ merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) # Checking the consistency of sentiment with positive and negative voted_up merged <- merge(sent.label= rev.sentiment$sent.label, voted_up= gamereviews$voted_up, by.x = "id", by.y = "recommendationid", all.x = TRUE) comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up) comparison <- comparison %>% Loading @@ -171,17 +175,22 @@ comparison <- comparison %>% voted_up==TRUE ~ "Positive", voted_up==FALSE ~ "Negative")) comparison$compare <- ifelse(comparison$sent.label == "Positive" & comparison$voted_up_mapped == "Positive", "positive", ifelse(comparison$sent.label == "Negative" & comparison$voted_up_mapped == "Negative", "positive", "Neutral")) # Counting the number of match match.count <- sum(comparison$compare=="Match") mismatch.count <- sum(comparison$compare=="Mismatch") # Converting 'voted_up' to a factor for better comparison comparison$voted_up <- as.factor(comparison$voted_up) total.count <- length(comparison$compare) # Creating a matrix conf_matrix <- table(comparison$sent.label, comparison$voted_up) print(conf_matrix) # Calculate percentage agreement total_obs <- sum(conf_matrix) correct_agreement <- sum(diag(conf_matrix)) percentage_agreement <- correct_agreement / total_obs * 100 # Printing result cat("Percentage Agreement:", percentage_agreement, "%\n") # Q2 # Calculating ration of match and mismatch match.ratio <- match.count / total.count # Display the result print(comparison_result) Loading
02_code/R/Script 2 - Sentiment analysis.R +33 −24 Original line number Diff line number Diff line Loading @@ -8,9 +8,10 @@ install.packages("tm") install.packages("RColorBrewer") install.packages("ggplot2") install.packages("dyplr") install.packages("irr") install.packages("lpSolve") # Loading of packages library(tidytext) library(stringr) library(syuzhet) library(NLP) Loading @@ -18,11 +19,13 @@ library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(irr) library(lpSolve) #loading necessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") # Only keeping necessary columns for analysis rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews) Loading @@ -30,12 +33,12 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") # Function to perform sentiment analysis on a single review.sent=sentiment # Function to perform sentiment analysis on a single review. (sent=sentiment) sent.analysis <- function(review) { # Tokenize the review into words Loading Loading @@ -101,7 +104,7 @@ percentage.identical <- mean(rev.sentiment$identical.score) * 100 # Display the result cat("Percentage of Identical Score:", round(percentage.identical,2), "%\n") # Sentiment analysis by using other two dictionaries from syuhet package-------- # Sentiment analysis by using other two dictionaries from syuzhet package-------- # sentiment score using AFINN dictionary rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn") Loading Loading @@ -149,21 +152,22 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) + axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text plot.margin = margin(25, 25, 25, 25), # right margin for legend legend.key.size = unit(0.5, "cm")) # size adjust for legend-key ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Emotions.jpg", ggsave(file= "C:/Users/akluj/OneDrive/Desktop/Emotions.jpg", width=15, height=15, units = "cm", dpi=1600) # Calculating weightage of enotions # Calculating weightage of emotions nrc.scores <- nrc.scores %>% mutate(weightage = score / sum(score) *100) cat("An emotion with highest weightage is positive with", round(nrc.scores$weightage[1],2),"%\n") cat("An emotion with highest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n") cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weightage[10],2),"%\n") # Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------ merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) # Checking the consistency of sentiment with positive and negative voted_up merged <- merge(sent.label= rev.sentiment$sent.label, voted_up= gamereviews$voted_up, by.x = "id", by.y = "recommendationid", all.x = TRUE) comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up) comparison <- comparison %>% Loading @@ -171,17 +175,22 @@ comparison <- comparison %>% voted_up==TRUE ~ "Positive", voted_up==FALSE ~ "Negative")) comparison$compare <- ifelse(comparison$sent.label == "Positive" & comparison$voted_up_mapped == "Positive", "positive", ifelse(comparison$sent.label == "Negative" & comparison$voted_up_mapped == "Negative", "positive", "Neutral")) # Counting the number of match match.count <- sum(comparison$compare=="Match") mismatch.count <- sum(comparison$compare=="Mismatch") # Converting 'voted_up' to a factor for better comparison comparison$voted_up <- as.factor(comparison$voted_up) total.count <- length(comparison$compare) # Creating a matrix conf_matrix <- table(comparison$sent.label, comparison$voted_up) print(conf_matrix) # Calculate percentage agreement total_obs <- sum(conf_matrix) correct_agreement <- sum(diag(conf_matrix)) percentage_agreement <- correct_agreement / total_obs * 100 # Printing result cat("Percentage Agreement:", percentage_agreement, "%\n") # Q2 # Calculating ration of match and mismatch match.ratio <- match.count / total.count # Display the result print(comparison_result)