Loading 02_code/R/Script 2 - Sentiment analysis.R +65 −26 Original line number Diff line number Diff line rm(list = ls()) # Installation of packages install.packages("stringr") install.packages("syuzhet") install.packages("NLP") install.packages("tm") install.packages("RColorBrewer") install.packages("ggplot2") install.packages("dyplr") install.packages("irr") install.packages("lpSolve") install.packages("irr") # Loading of packages library(stringr) library(syuzhet) library(NLP) library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(irr) library(lpSolve) library(irr) #loading necessary dataset load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass1_team18-main/01_data/raw/revsummary.RData.csv") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews) Loading @@ -33,7 +35,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -116,24 +118,25 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # Emotion analysis--------------------------------------------------- # Extracting the NRC emotion scores for each term nrc.scores <- rev.sentiment$score.nrc #Sum the NRC scores for each emotion across all reviews # Suming up the NRC scores for each emotion across all reviews nrc.scores <- colSums(nrc.scores, na.rm = TRUE) # Convert the aggregated scores to a data frame # Converting the aggregated scores to a data frame nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores) # Order the dataframe by scores in descending order # Ordering the dataframe by scores in descending order nrc.scores <- nrc.scores[order(-nrc.scores$score),] # Define a custom color palette with light colors # Defining a custom color palette with light colors using RColorBrewer package custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd") # Reorder the factor levels of emotion based on scores # Reordering the factor levels of emotion based on scores nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion) # Bar plotting of emotions with values Loading Loading @@ -165,32 +168,68 @@ cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weighta # Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------ merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up) # ANother datafarme to particularly check on sentiment consistency sent.compare <- data.frame(id = rev.merged$id, reviews = rev.merged$reviews, sent.label=rev.merged$sent.label, voted_up=rev.merged$voted_up) comparison <- comparison %>% sent.compare <- sent.compare %>% mutate(voted_up_mapped = case_when( voted_up==TRUE ~ "Positive", voted_up==FALSE ~ "Negative")) # Converting 'voted_up' to a factor for better comparison comparison$voted_up <- as.factor(comparison$voted_up) sent.compare$voted_up <- as.factor(sent.compare$voted_up) # Creating a matrix conf_matrix <- table(comparison$sent.label, comparison$voted_up) print(conf_matrix) # Creating a confusion matrix matrix conf.matrix <- table(sent.compare$sent.label, sent.compare$voted_up) print(conf.matrix) # Calculate percentage agreement total_obs <- sum(conf_matrix) correct_agreement <- sum(diag(conf_matrix)) percentage_agreement <- correct_agreement / total_obs * 100 # Calculating a percentage agreement total.obs <- sum(conf.matrix) correct.agreement <- sum(diag(conf.matrix)) percentage.agreement <- correct.agreement / total.obs * 100 # Printing result cat("Percentage Agreement:", percentage_agreement, "%\n") cat("Percentage Agreement:", percentage.agreement, "%\n") # Q2 # Creating a table of counts for each sentiment label # Adding another column of votes_up to check on the helpfulness of the review sent.compare$votes_up <- rev.merged$votes_up sent.count <- table(sent.compare$sent.label) # Displaying the counts print(sent.count) # Calculating an average helpfulness for each sentiment label avg.sent <- tapply( sent.compare$votes_up, sent.compare$sent.label, mean, na.rm = TRUE ) # Print results print(avg.sent) # Finding the index of the review with the highest helpful votes max.help.index <- which.max(sent.compare$votes_up) # Getting the corresponding review max.helpfulness.review <- sent.compare[max.help.index, ] # Printing a message for highest helpfulness review cat("A review with the highest helpfulness is", max.helpfulness.review$sent.label, "sentiment and its' helpfulness rank is", max.helpfulness.review$votes_up, "which is at",max.help.index) # I crossed check the review in rev.merged dataframe and I found that its sentiment label with # all types of sentiment analysis and it's sentiment is being neutral in all. # Q3 No newline at end of file Loading
02_code/R/Script 2 - Sentiment analysis.R +65 −26 Original line number Diff line number Diff line rm(list = ls()) # Installation of packages install.packages("stringr") install.packages("syuzhet") install.packages("NLP") install.packages("tm") install.packages("RColorBrewer") install.packages("ggplot2") install.packages("dyplr") install.packages("irr") install.packages("lpSolve") install.packages("irr") # Loading of packages library(stringr) library(syuzhet) library(NLP) library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(irr) library(lpSolve) library(irr) #loading necessary dataset load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass1_team18-main/01_data/raw/revsummary.RData.csv") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = reviews.clean$reviews) Loading @@ -33,7 +35,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -116,24 +118,25 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # Emotion analysis--------------------------------------------------- # Extracting the NRC emotion scores for each term nrc.scores <- rev.sentiment$score.nrc #Sum the NRC scores for each emotion across all reviews # Suming up the NRC scores for each emotion across all reviews nrc.scores <- colSums(nrc.scores, na.rm = TRUE) # Convert the aggregated scores to a data frame # Converting the aggregated scores to a data frame nrc.scores <- data.frame(emotion = names(nrc.scores), score = nrc.scores) # Order the dataframe by scores in descending order # Ordering the dataframe by scores in descending order nrc.scores <- nrc.scores[order(-nrc.scores$score),] # Define a custom color palette with light colors # Defining a custom color palette with light colors using RColorBrewer package custom_palette <- c("#a1d99b", "#f03b20", "#FFC3A0", "#FF677D", "#D4A5A5", "#fec44f", "#e6550d", "#fc9272", "#9ecae1", "#bdbdbd") # Reorder the factor levels of emotion based on scores # Reordering the factor levels of emotion based on scores nrc.scores$emotion <- factor(nrc.scores$emotion, levels = nrc.scores$emotion) # Bar plotting of emotions with values Loading Loading @@ -165,32 +168,68 @@ cat("An emotion with lowest weightage is disgust with", round(nrc.scores$weighta # Q1 Checking the consistency of sentiment with positive and negative voted_up------------------------------------------------ merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) comparison <- data.frame(id = merged$id, reviews = merged$reviews, sent.label=merged$sent.label, voted_up=merged$voted_up) # ANother datafarme to particularly check on sentiment consistency sent.compare <- data.frame(id = rev.merged$id, reviews = rev.merged$reviews, sent.label=rev.merged$sent.label, voted_up=rev.merged$voted_up) comparison <- comparison %>% sent.compare <- sent.compare %>% mutate(voted_up_mapped = case_when( voted_up==TRUE ~ "Positive", voted_up==FALSE ~ "Negative")) # Converting 'voted_up' to a factor for better comparison comparison$voted_up <- as.factor(comparison$voted_up) sent.compare$voted_up <- as.factor(sent.compare$voted_up) # Creating a matrix conf_matrix <- table(comparison$sent.label, comparison$voted_up) print(conf_matrix) # Creating a confusion matrix matrix conf.matrix <- table(sent.compare$sent.label, sent.compare$voted_up) print(conf.matrix) # Calculate percentage agreement total_obs <- sum(conf_matrix) correct_agreement <- sum(diag(conf_matrix)) percentage_agreement <- correct_agreement / total_obs * 100 # Calculating a percentage agreement total.obs <- sum(conf.matrix) correct.agreement <- sum(diag(conf.matrix)) percentage.agreement <- correct.agreement / total.obs * 100 # Printing result cat("Percentage Agreement:", percentage_agreement, "%\n") cat("Percentage Agreement:", percentage.agreement, "%\n") # Q2 # Creating a table of counts for each sentiment label # Adding another column of votes_up to check on the helpfulness of the review sent.compare$votes_up <- rev.merged$votes_up sent.count <- table(sent.compare$sent.label) # Displaying the counts print(sent.count) # Calculating an average helpfulness for each sentiment label avg.sent <- tapply( sent.compare$votes_up, sent.compare$sent.label, mean, na.rm = TRUE ) # Print results print(avg.sent) # Finding the index of the review with the highest helpful votes max.help.index <- which.max(sent.compare$votes_up) # Getting the corresponding review max.helpfulness.review <- sent.compare[max.help.index, ] # Printing a message for highest helpfulness review cat("A review with the highest helpfulness is", max.helpfulness.review$sent.label, "sentiment and its' helpfulness rank is", max.helpfulness.review$votes_up, "which is at",max.help.index) # I crossed check the review in rev.merged dataframe and I found that its sentiment label with # all types of sentiment analysis and it's sentiment is being neutral in all. # Q3 No newline at end of file