Commit 4a1691d1 authored by Hetvi Ariwala's avatar Hetvi Ariwala
Browse files

Minute errors found and corrected.

parent 90c89d70
Loading
Loading
Loading
Loading
+10 −19
Original line number Diff line number Diff line
@@ -32,28 +32,28 @@ load("D:/Hohenheim/SEM 3/ADS/introads_ass2_team18/01_data/raw/gamereviews.RData"
reviews.clean <- gamereviews

reviews.clean$reviews <- gsub("[^\\x00-\\x7F]+", "", reviews.clean$review, perl = TRUE) # remove non-ASCII character
print(reviews.clean$reviews[4546])
print(reviews.clean$reviews[4546]) # Updated review

reviews.clean$reviews <- gsub("\\b(\\w+)n't", "\\1 not", reviews.clean$reviews) #replacing n't with not with an empty space
print(reviews.clean$reviews[86])
print(reviews.clean$reviews[86]) # Updated review

reviews.clean$reviews <- gsub("http[s]?://\\S+", "", reviews.clean$reviews) # Remove URLs
print(reviews.clean$reviews[2296])
print(reviews.clean$reviews[2296]) # Updated review

reviews.clean$reviews <- gsub("(.)\\1{2,}", "\\1 ", reviews.clean$reviews) # Replace consecutive repeated characters with a single character
print(reviews.clean$reviews[76])
print(reviews.clean$reviews[76]) # Updated review

reviews.clean$reviews <- gsub("\\d", "", reviews.clean$reviews)  # Remove digits
print(reviews.clean$reviews[10])
print(reviews.clean$reviews[10]) # Updated review

reviews.clean$reviews <- gsub("[[:punct:]]", "", reviews.clean$reviews) # Remove punctuation
print(reviews.clean$reviews[10])
print(reviews.clean$reviews[10]) # Updated review

reviews.clean$reviews <- gsub("\\n", " ", reviews.clean$reviews) # Replace newline characters with a space
print(reviews.clean$reviews[2296])
print(reviews.clean$reviews[2296]) # Updated review

reviews.clean$reviews <- gsub("\\s+", " ", reviews.clean$reviews)  # Replace multiple spaces with a single space
print(reviews.clean$reviews[2296])
print(reviews.clean$reviews[2296]) # Updated review

reviews.clean$reviews <- ifelse(nchar(reviews.clean$reviews) == 1, "", reviews.clean$reviews) # Remove single-character reviews

@@ -70,13 +70,11 @@ reviews.clean$reviews <- sapply(reviews.clean$reviews, function(review) {
  cleaned_review <- paste(filtered_words, collapse = " ")
  return(cleaned_review)
})
print(reviews.clean$review[69])
print(reviews.clean$reviews[69])
print(reviews.clean$review[69]) # Original review
print(reviews.clean$reviews[69]) # Updated review

reviews.clean$reviews <- trimws(reviews.clean$reviews) # Remove leading and trailing whitespaces



# Spell checking of all reviews---------------------------------------------

# Loop through all reviews to check spelling based on US english dictionary
@@ -133,10 +131,3 @@ word.freq <- data.frame(table(all.words))
# creating word count
wordcloud2(data = word.freq, size=1.2,
                          color = "random-light", backgroundColor = "black")

# Obaining word stems
#word.stems <- wordStem(all.words.wof)

# Creating a word cloud using word stems
#wordcloud2(table(word.stems), size = 1.2, color = "random-light", 
#           backgroundColor = "black")