Loading 02_code/R/Script 2 - Sentiment analysis.R +10 −12 Original line number Diff line number Diff line Loading @@ -27,9 +27,9 @@ library(stats) library(lubridate) #loading necessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis Loading @@ -38,7 +38,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -245,12 +245,13 @@ print(avg.sent.score) # A review score based from revsummary file, it was dowanloaded in the 1st assisgnement overall.review.score <- revsummary$review_score print(overall.review.score) # Calculating correlation betwwen an average sentiment score and overall review score correlation <- cor(avg.sent.score, overall.review.score) # Calculating correlation between an average sentiment score and overall review score # correlation <- cor(avg.sent.score, overall.review.score) # Printing the correlation coefficient print(paste("Correlation between average sentiment and overall score:", correlation)) # print(paste("Correlation between average sentiment and overall score:", correlation)) # Q4 Sentiment variation ------------------------------------------------------- Loading Loading @@ -281,15 +282,12 @@ ggplot(data = data.frame(date = as.Date(names(total.score.date)), ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Sentiment variation.jpg", width=15, height=15, units = "cm", dpi=1600) # checking on highest sentiment date high.index <- which.ma # Finding the index of the review with the highest helpful votes high.index <- which.max(total.score.date$total_sentiment) # Getting the corresponding review # Getting the corresponding date high.count <- total.score.date[high.index,] print(high.count) # Printing a result cat("A highest sentiment score", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d")) cat("A highest sentiment score of", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d")) Loading
02_code/R/Script 2 - Sentiment analysis.R +10 −12 Original line number Diff line number Diff line Loading @@ -27,9 +27,9 @@ library(stats) library(lubridate) #loading necessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/rev.summary.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/reviews.clean.RData") # Only keeping necessary columns for analysis Loading @@ -38,7 +38,7 @@ rev.sentiment <- data.frame(id= reviews.clean$recommendationid, reviews = revie # Manual sentiment analysis----------------------------------------------------- # Loading of necessary words lists taken from Kaggel by Hu and Bing Liu setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/00_docs") setwd("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/00_docs") positive.words <- readLines("positive-words.txt") negative.words <- readLines("negative-words.txt") Loading Loading @@ -245,12 +245,13 @@ print(avg.sent.score) # A review score based from revsummary file, it was dowanloaded in the 1st assisgnement overall.review.score <- revsummary$review_score print(overall.review.score) # Calculating correlation betwwen an average sentiment score and overall review score correlation <- cor(avg.sent.score, overall.review.score) # Calculating correlation between an average sentiment score and overall review score # correlation <- cor(avg.sent.score, overall.review.score) # Printing the correlation coefficient print(paste("Correlation between average sentiment and overall score:", correlation)) # print(paste("Correlation between average sentiment and overall score:", correlation)) # Q4 Sentiment variation ------------------------------------------------------- Loading Loading @@ -281,15 +282,12 @@ ggplot(data = data.frame(date = as.Date(names(total.score.date)), ggsave(file= "C:/Users/hetvi/OneDrive/Desktop/Sentiment variation.jpg", width=15, height=15, units = "cm", dpi=1600) # checking on highest sentiment date high.index <- which.ma # Finding the index of the review with the highest helpful votes high.index <- which.max(total.score.date$total_sentiment) # Getting the corresponding review # Getting the corresponding date high.count <- total.score.date[high.index,] print(high.count) # Printing a result cat("A highest sentiment score", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d")) cat("A highest sentiment score of", high.count$total_sentiment, "was recorded on", format(high.count$date, "%Y-%m-%d"))