Loading 02_code/R/Script 2 - Sentiment analysis.R +71 −0 Original line number Diff line number Diff line Loading @@ -2,9 +2,19 @@ rm(list = ls()) # Installation of packages install.packages("stringr") install.packages("syuzhet") install.packages("NLP") install.packages("tm") install.packages("wordcloud") install.packages("RColorBrewer") # Loading of packages library(stringr) library(syuzhet) library(NLP) library(tm) library(wordcloud) library(RColorBrewer) #loading necessary dataset setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data") Loading Loading @@ -58,3 +68,64 @@ rev.sentiment$sentiment.score <- sapply(rev.sentiment$reviews, sent.analysis) # Labeling the sentiment score with sentiment rev.sentiment$sentiment.label <- ifelse(rev.sentiment$sentiment.score == 1, "Positive", ifelse(rev.sentiment$sentiment.score == -1, "Negative", "Neutral")) # Counting the total number reviews: positive.counts <- sum(rev.sentiment$sentiment.label == "Positive") cat("The total number of positive reviews are", positive.counts) negative.counts <- sum(rev.sentiment$sentiment.label == "Negative") cat("The total number of negative reviews are", negative.counts) neutral.counts <- sum(rev.sentiment$sentiment.label == "Neutral") cat("The total number of neutral reviews are", neutral.counts) # Syuzhet sentiment analysis using bing dictionary------------------------------ # Sentiment analysis using the Bing dictionary rev.sentiment$score.syuzhet <- get_sentiment(rev.sentiment$reviews, method = "bing") # Labeling the sentiment score with sentiment rev.sentiment$sent.label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive", ifelse(rev.sentiment$score.syuzhet <= -1, "Negative", "Neutral")) # Sentiment scores by manually and with syuzhet package and chceking if its identical or not rev.sentiment$identical.score <- ifelse(rev.sentiment$sentiment.score == score.syuzhet, 1 , 0) # Calculate the percentage of identical scores percentage.identical <- mean(rev.sentiment$identical.score) * 100 # Display the result cat("Percentage of Identical Score:", percentage.identical, "%\n") # Sentiment analysis by using other two dictionaries from syuhet package-------- # sentiment score using AFINN dictionary rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn") # Labeling sentiment score with sentiment rev.sentiment$sent.label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", ifelse(rev.sentiment$score.afinn <= -1, "Negative", "Neutral")) # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # Wordcloud based on emotions--------------------------------------------------- reviews <- rev.sentiment$reviews # Extract the NRC emotion scores for each term # Assuming 'nrc_scores' is a column containing NRC scores for each review in your rev.sentiment dataframe nrc_scores <- rev.sentiment$score.nrc #Sum the NRC scores for each emotion across all reviews aggregate_nrc <- colSums(nrc_scores, na.rm = TRUE) # Convert the aggregated scores to a data frame nrc_aggregated_df <- data.frame(emotion = names(aggregate_nrc), score = aggregate_nrc) # Remove rows with NAs (if any) nrc_aggregated_df <- nrc_aggregated_df[complete.cases(nrc_aggregated_df), ] # Create the word cloud wordcloud(words = nrc_aggregated_df$emotion, freq = nrc_aggregated_df$score, min.freq = 1, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2")) Loading
02_code/R/Script 2 - Sentiment analysis.R +71 −0 Original line number Diff line number Diff line Loading @@ -2,9 +2,19 @@ rm(list = ls()) # Installation of packages install.packages("stringr") install.packages("syuzhet") install.packages("NLP") install.packages("tm") install.packages("wordcloud") install.packages("RColorBrewer") # Loading of packages library(stringr) library(syuzhet) library(NLP) library(tm) library(wordcloud) library(RColorBrewer) #loading necessary dataset setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data") Loading Loading @@ -58,3 +68,64 @@ rev.sentiment$sentiment.score <- sapply(rev.sentiment$reviews, sent.analysis) # Labeling the sentiment score with sentiment rev.sentiment$sentiment.label <- ifelse(rev.sentiment$sentiment.score == 1, "Positive", ifelse(rev.sentiment$sentiment.score == -1, "Negative", "Neutral")) # Counting the total number reviews: positive.counts <- sum(rev.sentiment$sentiment.label == "Positive") cat("The total number of positive reviews are", positive.counts) negative.counts <- sum(rev.sentiment$sentiment.label == "Negative") cat("The total number of negative reviews are", negative.counts) neutral.counts <- sum(rev.sentiment$sentiment.label == "Neutral") cat("The total number of neutral reviews are", neutral.counts) # Syuzhet sentiment analysis using bing dictionary------------------------------ # Sentiment analysis using the Bing dictionary rev.sentiment$score.syuzhet <- get_sentiment(rev.sentiment$reviews, method = "bing") # Labeling the sentiment score with sentiment rev.sentiment$sent.label.syuzhet <- ifelse(rev.sentiment$score.syuzhet >= 1, "Positive", ifelse(rev.sentiment$score.syuzhet <= -1, "Negative", "Neutral")) # Sentiment scores by manually and with syuzhet package and chceking if its identical or not rev.sentiment$identical.score <- ifelse(rev.sentiment$sentiment.score == score.syuzhet, 1 , 0) # Calculate the percentage of identical scores percentage.identical <- mean(rev.sentiment$identical.score) * 100 # Display the result cat("Percentage of Identical Score:", percentage.identical, "%\n") # Sentiment analysis by using other two dictionaries from syuhet package-------- # sentiment score using AFINN dictionary rev.sentiment$score.afinn <- get_sentiment(rev.sentiment$reviews, method = "afinn") # Labeling sentiment score with sentiment rev.sentiment$sent.label.afinn <- ifelse(rev.sentiment$score.afinn >= 1, "Positive", ifelse(rev.sentiment$score.afinn <= -1, "Negative", "Neutral")) # Sentiment score using nrc dictionary rev.sentiment$score.nrc <- get_nrc_sentiment(rev.sentiment$reviews) # Wordcloud based on emotions--------------------------------------------------- reviews <- rev.sentiment$reviews # Extract the NRC emotion scores for each term # Assuming 'nrc_scores' is a column containing NRC scores for each review in your rev.sentiment dataframe nrc_scores <- rev.sentiment$score.nrc #Sum the NRC scores for each emotion across all reviews aggregate_nrc <- colSums(nrc_scores, na.rm = TRUE) # Convert the aggregated scores to a data frame nrc_aggregated_df <- data.frame(emotion = names(aggregate_nrc), score = aggregate_nrc) # Remove rows with NAs (if any) nrc_aggregated_df <- nrc_aggregated_df[complete.cases(nrc_aggregated_df), ] # Create the word cloud wordcloud(words = nrc_aggregated_df$emotion, freq = nrc_aggregated_df$score, min.freq = 1, scale = c(3, 0.5), colors = brewer.pal(8, "Dark2"))