Loading 02_code/R/Script 2 - Sentiment analysis.R +20 −10 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ if(!require("dyplr")) install.packages("dyplr") if(!require("lpSolve")) install.packages("lpSolve") if(!require("irr")) install.packages("irr") if(!require("lubridate")) install.packages("lubridate") if(!require("xtable")) install.packages("xtable") # Loading of packages library(syuzhet) Loading @@ -27,13 +28,15 @@ library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(lpsolve) library(irr) library(lubridate) library(xtable) #loading necessary dataset load("~/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("~/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("~/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis Loading @@ -43,7 +46,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("~/ADS/introads_ass2_team18/00_docs") setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -129,16 +132,14 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # calculating coorelation between the results of dictionaries correlation_matrix <- cor(rev.sentiment[c("score.manual", "score.bing", "score.afinn")]) # Creating LaTeX-formatted table latex_table <- xtable(correlation_matrix, caption = "Correlation Matrix") # Printing the LaTeX code print(latex_table, include.rownames = TRUE) save(rev.sentiment,file = "~/ADS/introads_ass2_team18/01_data/rev.sentiment.RData") save(rev.sentiment,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.sentiment.RData") # Emotion analysis--------------------------------------------------- # Extracting the NRC emotion scores for each term Loading Loading @@ -175,7 +176,7 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) + axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text plot.margin = margin(25, 25, 25, 25), # right margin for legend legend.key.size = unit(0.5, "cm")) # size adjust for legend-key ggsave(file = "~/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, ggsave(file = "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, height = 15, units = "cm", dpi = 1600) Loading @@ -197,6 +198,9 @@ cat("An emotion with lowest weightage is disgust with", rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) # Saving the dataset, as its a primary dataset to perform further analysis. save(rev.merged,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.merged.RData") # Due to the length of the 'rev.merged' dataframe, a new dataframe was derived to #facilitate efficient analysis. This new dataframe selectively extracts essential columns #and information, streamlining the analysis process. Loading Loading @@ -243,6 +247,8 @@ cat("A Percentage of the relevance between manual sentiment score and # Adding another column of votes_up to check on the helpfulness of the review sent.compare$votes_up <- rev.merged$votes_up # Counting the number of sentiments sent.count <- table(sent.compare$label.manual) # Displaying the counts Loading Loading @@ -298,6 +304,10 @@ sent.compare$date <- as.Date(sent.compare$date) # adding a column of sent score from rev.merged into sent.compare sent.compare$score.manual <- rev.merged$score.manual # Saving the dataset, as its has been extracted from rev.merged datframe which is too large. # Hence, sent.compare dataframe made for easiness. save(sent.compare,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/sent.compare.RData") # Aggregating total sentiment scores by date total.score.date <- tapply(sent.compare$score.manual, sent.compare$date, sum) Loading @@ -312,7 +322,7 @@ ggplot(data = total.score.date) + labs(x = "Date", y = "Total Sentiment Score") + theme_minimal() ggsave(file= "~/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", ggsave(file= "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", width=15, height=15, units = "cm", dpi=1600) # Finding the index of the review with the highest helpful votes Loading Loading
02_code/R/Script 2 - Sentiment analysis.R +20 −10 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ if(!require("dyplr")) install.packages("dyplr") if(!require("lpSolve")) install.packages("lpSolve") if(!require("irr")) install.packages("irr") if(!require("lubridate")) install.packages("lubridate") if(!require("xtable")) install.packages("xtable") # Loading of packages library(syuzhet) Loading @@ -27,13 +28,15 @@ library(tm) library(RColorBrewer) library(ggplot2) library(dplyr) library(lpsolve) library(irr) library(lubridate) library(xtable) #loading necessary dataset load("~/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("~/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("~/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis Loading @@ -43,7 +46,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("~/ADS/introads_ass2_team18/00_docs") setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -129,16 +132,14 @@ rev.sentiment$label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # calculating coorelation between the results of dictionaries correlation_matrix <- cor(rev.sentiment[c("score.manual", "score.bing", "score.afinn")]) # Creating LaTeX-formatted table latex_table <- xtable(correlation_matrix, caption = "Correlation Matrix") # Printing the LaTeX code print(latex_table, include.rownames = TRUE) save(rev.sentiment,file = "~/ADS/introads_ass2_team18/01_data/rev.sentiment.RData") save(rev.sentiment,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.sentiment.RData") # Emotion analysis--------------------------------------------------- # Extracting the NRC emotion scores for each term Loading Loading @@ -175,7 +176,7 @@ ggplot(nrc.scores, aes(x = emotion, y = score, fill = emotion)) + axis.text.x = element_text(angle = 45, hjust = 1, size = 8, face = "bold"), # Bolds the x-axis text plot.margin = margin(25, 25, 25, 25), # right margin for legend legend.key.size = unit(0.5, "cm")) # size adjust for legend-key ggsave(file = "~/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, ggsave(file = "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Emotions.jpg", width = 15, height = 15, units = "cm", dpi = 1600) Loading @@ -197,6 +198,9 @@ cat("An emotion with lowest weightage is disgust with", rev.merged <- merge(rev.sentiment, gamereviews, by.x = "id", by.y = "recommendationid", all.x = TRUE) # Saving the dataset, as its a primary dataset to perform further analysis. save(rev.merged,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/rev.merged.RData") # Due to the length of the 'rev.merged' dataframe, a new dataframe was derived to #facilitate efficient analysis. This new dataframe selectively extracts essential columns #and information, streamlining the analysis process. Loading Loading @@ -243,6 +247,8 @@ cat("A Percentage of the relevance between manual sentiment score and # Adding another column of votes_up to check on the helpfulness of the review sent.compare$votes_up <- rev.merged$votes_up # Counting the number of sentiments sent.count <- table(sent.compare$label.manual) # Displaying the counts Loading Loading @@ -298,6 +304,10 @@ sent.compare$date <- as.Date(sent.compare$date) # adding a column of sent score from rev.merged into sent.compare sent.compare$score.manual <- rev.merged$score.manual # Saving the dataset, as its has been extracted from rev.merged datframe which is too large. # Hence, sent.compare dataframe made for easiness. save(sent.compare,file = "D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/sent.compare.RData") # Aggregating total sentiment scores by date total.score.date <- tapply(sent.compare$score.manual, sent.compare$date, sum) Loading @@ -312,7 +322,7 @@ ggplot(data = total.score.date) + labs(x = "Date", y = "Total Sentiment Score") + theme_minimal() ggsave(file= "~/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", ggsave(file= "D:Hohenheim/SEM 3/ADS/introads_ass2_team18/03_report/graphs/Sentiment variation.jpg", width=15, height=15, units = "cm", dpi=1600) # Finding the index of the review with the highest helpful votes Loading