Loading 02_code/R/Script 1 - Data cleaning.R +8 −8 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ library(wordcloud2) library(SnowballC) #loading of ncessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") # Cleaning of the user reviews-------------------------------------------------- Loading Loading @@ -50,8 +50,8 @@ clean.review <- function(review) { return(cleaned.review) } # Applying function on each review which gives cleaned review reviews.clean$review <- sapply(reviews.clean$review, clean.review) # Applying function on each review which gives cleaned review and created a new column to verify the change reviews.clean$reviews <- sapply(reviews.clean$review, clean.review) # Exclude observations with empty spaces reviews.clean <- reviews.clean[reviews.clean$review != "",] Loading @@ -66,7 +66,7 @@ reviews.clean <- reviews.clean[reviews.clean$review != "",] # Loop through all reviews to check spelling based on US english dictionary for (i in 1:nrow(reviews.clean)){ reviews <- reviews.clean$review[i] reviews <- reviews.clean$reviews[i] #extract words words <- unlist(strsplit(reviews, " ")) Loading @@ -81,11 +81,11 @@ for (i in 1:nrow(reviews.clean)){ correct_review <- paste(correct_words, collapse = " ") # update the dataframe reviews.clean$review[i] <- correct_review reviews.clean$reviews[i] <- correct_review } # Transform all upper case chracters in a lower case character reviews.clean$review <- tolower(reviews.clean$review) reviews.clean$reviews <- tolower(reviews.clean$reviews) # Setting a working directory to save dataset setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data") Loading @@ -95,14 +95,14 @@ write.csv(reviews.clean, file = "reviews.clean.csv") # word cloud-------------------------------------------------------------------- # combine all reviews into a single text all.reviews <- paste(reviews.clean$review, collapse = "") all.reviews <- paste(reviews.clean$reviews, collapse = "") all.words <- character() # loop through all reviews for (i in 1:nrow(reviews.clean)) { #split the review into words tmp <- unlist(strsplit(reviews.clean$review[i], " ")) tmp <- unlist(strsplit(reviews.clean$reviews[i], " ")) # Concatenate the words to the all.words vector all.words <- c(all.words, tmp) } Loading Loading
02_code/R/Script 1 - Data cleaning.R +8 −8 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ library(wordcloud2) library(SnowballC) #loading of ncessary dataset load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData") # Cleaning of the user reviews-------------------------------------------------- Loading Loading @@ -50,8 +50,8 @@ clean.review <- function(review) { return(cleaned.review) } # Applying function on each review which gives cleaned review reviews.clean$review <- sapply(reviews.clean$review, clean.review) # Applying function on each review which gives cleaned review and created a new column to verify the change reviews.clean$reviews <- sapply(reviews.clean$review, clean.review) # Exclude observations with empty spaces reviews.clean <- reviews.clean[reviews.clean$review != "",] Loading @@ -66,7 +66,7 @@ reviews.clean <- reviews.clean[reviews.clean$review != "",] # Loop through all reviews to check spelling based on US english dictionary for (i in 1:nrow(reviews.clean)){ reviews <- reviews.clean$review[i] reviews <- reviews.clean$reviews[i] #extract words words <- unlist(strsplit(reviews, " ")) Loading @@ -81,11 +81,11 @@ for (i in 1:nrow(reviews.clean)){ correct_review <- paste(correct_words, collapse = " ") # update the dataframe reviews.clean$review[i] <- correct_review reviews.clean$reviews[i] <- correct_review } # Transform all upper case chracters in a lower case character reviews.clean$review <- tolower(reviews.clean$review) reviews.clean$reviews <- tolower(reviews.clean$reviews) # Setting a working directory to save dataset setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data") Loading @@ -95,14 +95,14 @@ write.csv(reviews.clean, file = "reviews.clean.csv") # word cloud-------------------------------------------------------------------- # combine all reviews into a single text all.reviews <- paste(reviews.clean$review, collapse = "") all.reviews <- paste(reviews.clean$reviews, collapse = "") all.words <- character() # loop through all reviews for (i in 1:nrow(reviews.clean)) { #split the review into words tmp <- unlist(strsplit(reviews.clean$review[i], " ")) tmp <- unlist(strsplit(reviews.clean$reviews[i], " ")) # Concatenate the words to the all.words vector all.words <- c(all.words, tmp) } Loading