Commit c2306d11 authored by amra669c's avatar amra669c
Browse files

Update analysis scripts and results

parent b4ab99ac
Loading
Loading
Loading
Loading

00_docs/Schedule.pdf

deleted100644 → 0
−77.8 KiB

File deleted.

+4 −1
Original line number Diff line number Diff line
@@ -76,7 +76,10 @@ summary(reviews_final$sentiment_nrc)
# Overall, the game is perceived positively, but the scores vary because
# NRC counts emotion words and depends on review length.


#-------------------------------------------
# Add a column containing the normalized nrc
#-------------------------------------------
reviews_final$sentiment_nrc_tanh <- tanh(reviews_final$sentiment_nrc)

# save the file
save(reviews_final,
+120 −24
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ graphics.off()
# ---------------------------
# Set working directory
# ---------------------------
## setwd()
#setwd()

# ---------------------------------------------------------------------------
# Load data
@@ -104,6 +104,7 @@ barplot(
  ylim = c(0, 5000)
)


# Most frequent NRC emotion
sum(reviews_merged$nrc_emotion_dominant == "anticipation", na.rm = TRUE)

@@ -123,6 +124,10 @@ reviews_merged$emotion_LLM <- vapply(

llm_counts <- table(reviews_merged$emotion_LLM, useNA = "no")


plot_llm_emotions <- function() {
  par(mar = c(8, 6, 4, 2))
  
  barplot(
    llm_counts,
    main = "Emotions (LLM)",
@@ -133,16 +138,15 @@ barplot(
    ylim = c(0, 5000)
  )
  
  mtext("Emotion",   side = 1, line = 6)
  mtext("Frequency", side = 2, line = 5)
  
  par(mar = c(5, 4, 4, 2))
}


plot_llm_emotions()

barplot(
  llm_counts,
  main = "Emotions (LLM)",
  xlab = "Emotion",
  ylab = "Frequency",
  col  = "lightgreen",
  las  = 2,
  ylim = c(0, 5000)
)

# Most frequent LLM emotion
sum(reviews_merged$emotion_LLM == "joy", na.rm = TRUE)
@@ -190,6 +194,7 @@ points(
  pch = 19
)


# ---------------------------------------------------------------------------
# 6) Extreme sentiment and helpfulness
# ---------------------------------------------------------------------------
@@ -273,7 +278,6 @@ library(ggplot2)
library(dplyr)
library(tidyr)

# Density comparison (same colors and format)
df_long <- data.frame(
  score = c(
    reviews_final$sentiment_manual,
@@ -286,7 +290,7 @@ df_long <- data.frame(
  )
)

ggplot(df_long, aes(x = score, color = method)) +
p_density <- ggplot(df_long, aes(x = score, color = method)) +
  geom_density(size = 1.2) +
  scale_color_manual(values = c(
    "Manual" = "lightgreen",
@@ -302,6 +306,18 @@ ggplot(df_long, aes(x = score, color = method)) +
  theme_minimal() +
  theme(legend.position = "right")

# show the plot
print(p_density)

# save the same plot
ggsave(
  filename = "03_report/graphs/density_sentiment_comparison.png",
  plot     = p_density,
  width    = 7,
  height   = 5,
  dpi      = 300
)

# ---------------------------------------------------------------------------
# Barplot: Emotion distribution comparison (LLM vs NRC)
# ---------------------------------------------------------------------------
@@ -319,7 +335,7 @@ emotion_df <- rbind(
  )
)

ggplot(emotion_df, aes(x = reorder(emotion, count, FUN = mean),
p_emotion <- ggplot(emotion_df, aes(x = reorder(emotion, count, FUN = mean),
                                    y = count,
                                    fill = method)) +
  geom_col(position = "dodge", alpha = 0.8) +
@@ -335,4 +351,84 @@ ggplot(emotion_df, aes(x = reorder(emotion, count, FUN = mean),
  theme_minimal() +
  theme(legend.position = "right")

# show
print(p_emotion)

# save
ggsave(
  "03_report/graphs/emotion_comparison.png",
  plot   = p_emotion,
  width  = 7,
  height = 5,
  dpi    = 300
)


# ---------------------------------------------------------------------------
# Prepare data for boxplot (LLM vs NRC by voted_up)
# ---------------------------------------------------------------------------

df_box <- bind_rows(
  
  # LLM sentiment (already in [-1, 1])
  reviews_merged %>%
    transmute(
      voted_up,
      sentiment_score = sentiment,
      method = "LLM"
    ),
  
  # NRC sentiment (tanh-normalized to [-1, 1])
  reviews_merged %>%
    transmute(
      voted_up,
      sentiment_score = sentiment_nrc_tanh,
      method = "NRC"
    )
)

# ---------------------------------------------------------------------------
# Boxplot
# ---------------------------------------------------------------------------

p_box <- ggplot(
  df_box,
  aes(x = voted_up, y = sentiment_score, fill = method)
) +
  geom_boxplot(
    alpha = 0.7,
    position = position_dodge(width = 0.8)
  ) +
  stat_summary(
    fun = mean,
    geom = "point",
    shape = 19,
    size = 3,
    color = "red",
    position = position_dodge(width = 0.8)
  ) +
  scale_fill_manual(
    values = c("LLM" = "lightblue", "NRC" = "lightcoral")
  ) +
  labs(
    x = "Voted Up",
    y = "Sentiment score (-1 to 1)",
    fill = "Method"
  ) +
  theme_minimal() +
  theme(legend.position = "right")

# show plot
print(p_box)

ggsave(
  "03_report/graphs/sentiment_boxplot_LLM_NRC.png",
  plot = p_box,
  width = 7,
  height = 5,
  dpi = 300
)



−4.82 KiB

File deleted.

Loading