-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecayfactorsentimentest.R
151 lines (133 loc) · 8.99 KB
/
decayfactorsentimentest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
predict_mean_sentiment <- function(paragraph, sentiment_model_2) {
# Clean the paragraph
paragraph_clean <- gsub("[[:punct:]]", "", paragraph) %>%
gsub("\\d+", "", .) %>%
tolower()
# Tokenize the paragraph
paragraph_df <- data.frame(paragraph_clean = paragraph_clean)
paragraph_tokenized <- paragraph_df %>%
unnest_tokens(word, paragraph_clean)
# Check if the tokenization results in an empty data frame
if (nrow(paragraph_tokenized) == 0) {
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = NA, stringsAsFactors = FALSE))
}
# Calculate sentiment scores
paragraph_sentiment <- paragraph_tokenized %>%
mutate(sentiment_score = get_sentiment(word, method = "afinn"))
# Aggregate sentiment scores
paragraph_aggregated <- paragraph_sentiment %>%
summarise(
total_words = n(),
total_sentiment = sum(sentiment_score, na.rm = TRUE),
total_emotion_words = sum(sentiment_score != 0, na.rm = TRUE),
neutral_count = sum(sentiment_score == 0, na.rm = TRUE),
extreme_positive_count = sum(sentiment_score >= 4, na.rm = TRUE),
extreme_negative_count = sum(sentiment_score <= -4, na.rm = TRUE),
moderate_positive_count = sum(sentiment_score == 2, na.rm = TRUE),
moderate_negative_count = sum(sentiment_score == -2, na.rm = TRUE),
medium_positive_count = sum(sentiment_score == 3, na.rm = TRUE),
medium_negative_count = sum(sentiment_score == -3, na.rm = TRUE),
slight_positive_count = sum(sentiment_score == 1, na.rm = TRUE),
slight_negative_count = sum(sentiment_score == -1, na.rm = TRUE)
) %>%
mutate(
adjusted_mean_sentiment = if_else(total_emotion_words > 0, total_sentiment / total_emotion_words, 0),
buffer_count = total_words - (total_emotion_words + neutral_count),
extreme_positive_ratio = extreme_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
extreme_negative_ratio = extreme_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
moderate_positive_ratio = moderate_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
moderate_negative_ratio = moderate_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
medium_positive_ratio = medium_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
medium_negative_ratio = medium_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
slight_positive_ratio = slight_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
slight_negative_ratio = slight_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
neutral_ratio = neutral_count / if_else(total_words > 0, total_words, 1),
negative_impact = 1.5 * slight_negative_ratio + 2 * medium_negative_ratio + 2.5 * moderate_negative_ratio + 3 * extreme_negative_ratio,
positive_impact = 1.5 * slight_positive_ratio + 2 * medium_positive_ratio + 2.5 * moderate_positive_ratio + 3 * extreme_positive_ratio,
interaction_term = negative_impact * positive_impact,
squared_negative_impact = negative_impact^2,
squared_positive_impact = positive_impact^2,
buffer_ratio = buffer_count / if_else(total_words > 0, total_words, 1),
log_emotion_ratio = log1p(total_emotion_words / total_words),
exp_buffer_neutral_ratio = exp(min(neutral_count + buffer_count, 10) / total_words),
decay_factor = exp(-(neutral_count + buffer_count) / total_words),
mean_sentiment = total_sentiment / total_emotion_words,
adjusted_mean_sentiment = adjusted_mean_sentiment * decay_factor
)
# Calculate max values from the paragraph itself
max_buffer_ratio <- max(paragraph_aggregated$buffer_ratio, na.rm = TRUE)
max_neutral_ratio <- max(paragraph_aggregated$neutral_ratio, na.rm = TRUE)
max_positive_impact <- max(1.5 * paragraph_aggregated$slight_positive_ratio + 2 * paragraph_aggregated$medium_positive_ratio + 2.5 * paragraph_aggregated$moderate_positive_ratio + 3 * paragraph_aggregated$extreme_positive_ratio, na.rm = TRUE)
max_negative_impact <- max(1.5 * paragraph_aggregated$slight_negative_ratio + 2 * paragraph_aggregated$medium_negative_ratio + 2.5 * paragraph_aggregated$moderate_negative_ratio + 3 * paragraph_aggregated$extreme_negative_ratio, na.rm = TRUE)
# Scale features based on their calculated maximums
max_buffer_ratio <- max(paragraph_aggregated$buffer_ratio, na.rm = TRUE)
max_neutral_ratio <- max(paragraph_aggregated$neutral_ratio, na.rm = TRUE)
max_interaction_term <- max(train_data$interaction_term, na.rm = TRUE)
# Calculate the maximum positive and negative impact
max_positive_impact <- max(1.5 * paragraph_aggregated$slight_positive_ratio +
2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio +
3 * paragraph_aggregated$extreme_positive_ratio -
(1.5 * paragraph_aggregated$slight_negative_ratio +
2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio +
3 * paragraph_aggregated$extreme_negative_ratio), na.rm = TRUE)
max_negative_impact <- max(1.5 * paragraph_aggregated$slight_negative_ratio +
2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio +
3 * paragraph_aggregated$extreme_negative_ratio -
(1.5 * paragraph_aggregated$slight_positive_ratio +
2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio +
3 * paragraph_aggregated$extreme_positive_ratio), na.rm = TRUE)
# Check if max buffer ratio is 0 and if so, set it to max neutral ratio
if (max_buffer_ratio == 0) {
max_buffer_ratio <- max_neutral_ratio
} else if (max_neutral_ratio == 0) { # Check if max neutral ratio is 0 and if so, set it to max buffer ratio
max_neutral_ratio <- max_buffer_ratio
}
# Check if both max positive impact and max negative impact are 0
if (max_positive_impact == 0 && max_negative_impact == 0) {
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = 0, stringsAsFactors = FALSE))
}
# Check if max positive impact is 0, set scaled positive ratio to 0
if (max_positive_impact == 0) {
paragraph_aggregated$scaled_positive_ratio <- 0
}
# Check if max negative impact is 0, set scaled negative ratio to 0
if (max_negative_impact == 0) {
paragraph_aggregated$scaled_negative_ratio <- 0
}
# Ensure max values are not zero or NA
max_values <- c(max_buffer_ratio, max_neutral_ratio, max_positive_impact, max_negative_impact)
if (any(max_values == 0 | is.na(max_values))) {
warning("One or more max scaling factors are zero or NA.")
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = NA, stringsAsFactors = FALSE))
}
# Apply scaling
paragraph_aggregated <- paragraph_aggregated %>%
mutate(
scaled_buffer_ratio = buffer_ratio / max_buffer_ratio,
scaled_neutral_ratio = neutral_ratio / max_neutral_ratio,
scaled_positive_ratio = if_else(max_positive_impact == 0, -0.5 * max_negative_impact,
(1.5 * paragraph_aggregated$slight_positive_ratio + 2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio + 3 * paragraph_aggregated$extreme_positive_ratio) / max_positive_impact),
scaled_negative_ratio = if_else(max_negative_impact == 0, -0.5 * max_positive_impact,
(1.5 * paragraph_aggregated$slight_negative_ratio + 2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio + 3 * paragraph_aggregated$extreme_negative_ratio) / max_negative_impact)
)
# Predict mean sentiment if not already set to 0
if (max_positive_impact != 0 || max_negative_impact != 0) {
predicted_sentiment <- predict(sentiment_model_2, newdata = paragraph_aggregated)
} else {
predicted_sentiment <- 0
}
# Return a data frame with the original paragraph and predicted sentiment
result <- data.frame(paragraph = paragraph, predicted_mean_sentiment = predicted_sentiment)
return(result)
}
# Example usage
paragraph <- "My day is ruined and my life is over."
loaded_model_2 <- readRDS("sentiment_model_2.rds") # Replace with the actual path to your saved model
predicted_sentiment <- predict_mean_sentiment(paragraph, loaded_model_2)
print(predicted_sentiment)