Commit 877bdf9c authored by Sarish's avatar Sarish
Browse files

small changes have been made to verify the data cleaning process on reviews

parent 2b66276a
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ library(wordcloud2)
library(SnowballC)

#loading of ncessary dataset
load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")
load("C:/Users/akluj/SEM_3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData")


# Cleaning of the user reviews--------------------------------------------------
@@ -50,8 +50,8 @@ clean.review <- function(review) {
  return(cleaned.review)
}

# Applying function on each review which gives cleaned review
reviews.clean$review <- sapply(reviews.clean$review, clean.review)
# Applying function on each review which gives cleaned review and created a new column to verify the change
reviews.clean$reviews <- sapply(reviews.clean$review, clean.review)

# Exclude observations with empty spaces
reviews.clean <- reviews.clean[reviews.clean$review != "",]
@@ -66,7 +66,7 @@ reviews.clean <- reviews.clean[reviews.clean$review != "",]
# Loop through all reviews to check spelling based on US english dictionary

for (i in 1:nrow(reviews.clean)){
  reviews <- reviews.clean$review[i]
  reviews <- reviews.clean$reviews[i]
  
  #extract words
  words <- unlist(strsplit(reviews, " "))
@@ -81,11 +81,11 @@ for (i in 1:nrow(reviews.clean)){
  correct_review <- paste(correct_words, collapse = " ")
  
  # update the dataframe
  reviews.clean$review[i] <- correct_review
  reviews.clean$reviews[i] <- correct_review
}

# Transform all upper case chracters in a lower case character
reviews.clean$review <- tolower(reviews.clean$review)
reviews.clean$reviews <- tolower(reviews.clean$reviews)

# Setting a working directory to save dataset
setwd("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data")
@@ -95,14 +95,14 @@ write.csv(reviews.clean, file = "reviews.clean.csv")

# word cloud--------------------------------------------------------------------
# combine all reviews into a single text
all.reviews <- paste(reviews.clean$review, collapse = "")
all.reviews <- paste(reviews.clean$reviews, collapse = "")


all.words <- character()
# loop through all reviews
for (i in 1:nrow(reviews.clean)) {
  #split the review into words
  tmp <- unlist(strsplit(reviews.clean$review[i], " "))
  tmp <- unlist(strsplit(reviews.clean$reviews[i], " "))
  # Concatenate the words to the all.words vector
  all.words <- c(all.words, tmp)
}