-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheckthisoutbrah.R
144 lines (124 loc) · 8.22 KB
/
checkthisoutbrah.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
predict_mean_sentiment <- function(paragraph, loaded_model) {
# Clean the paragraph
paragraph_clean <- gsub("[[:punct:]]", "", paragraph) %>%
gsub("\\d+", "", .) %>%
tolower()
# Tokenize the paragraph
paragraph_df <- data.frame(paragraph_clean = paragraph_clean)
paragraph_tokenized <- paragraph_df %>%
unnest_tokens(word, paragraph_clean)
# Check if the tokenization results in an empty data frame
if(nrow(paragraph_tokenized) == 0) {
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = NA, stringsAsFactors = FALSE))
}
# Calculate sentiment scores
paragraph_sentiment <- paragraph_tokenized %>%
mutate(sentiment_score = get_sentiment(word, method = "afinn"))
# Aggregate sentiment scores
paragraph_aggregated <- paragraph_sentiment %>%
summarise(
total_words = n(),
total_sentiment = sum(sentiment_score, na.rm = TRUE),
total_emotion_words = sum(sentiment_score != 0, na.rm = TRUE),
neutral_count = sum(sentiment_score == 0, na.rm = TRUE),
extreme_positive_count = sum(sentiment_score >= 4, na.rm = TRUE),
extreme_negative_count = sum(sentiment_score <= -4, na.rm = TRUE),
moderate_positive_count = sum(sentiment_score == 2, na.rm = TRUE),
moderate_negative_count = sum(sentiment_score == -2, na.rm = TRUE),
medium_positive_count = sum(sentiment_score == 3, na.rm = TRUE),
medium_negative_count = sum(sentiment_score == -3, na.rm = TRUE),
slight_positive_count = sum(sentiment_score == 1, na.rm = TRUE),
slight_negative_count = sum(sentiment_score == -1, na.rm = TRUE),
neutral_ratio = neutral_count / if_else(total_words > 0, total_words, 1)
) %>%
mutate(
mean_sentiment = if_else(total_emotion_words > 0, total_sentiment / total_emotion_words, 0),
buffer_count = total_words - (total_emotion_words + neutral_count),
extreme_positive_ratio = extreme_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
extreme_negative_ratio = extreme_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
moderate_positive_ratio = moderate_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
moderate_negative_ratio = moderate_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
medium_positive_ratio = medium_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
medium_negative_ratio = medium_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
slight_positive_ratio = slight_positive_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
slight_negative_ratio = slight_negative_count / if_else(total_emotion_words > 0, total_emotion_words, 1),
buffer_ratio = if_else(buffer_count == 0, neutral_ratio, buffer_count / if_else(total_words > 0, total_words, 1)),
neutral_ratio = if_else(neutral_count == 0, buffer_ratio, neutral_count / if_else(total_words > 0, total_words, 1))
)
# Print the calculated ratios
print(paragraph_aggregated)
# Scale features based on their calculated maximums
max_buffer_ratio <- max(paragraph_aggregated$buffer_ratio, na.rm = TRUE)
max_neutral_ratio <- max(paragraph_aggregated$neutral_ratio, na.rm = TRUE)
# Calculate the maximum positive impact
max_positive_impact <- max(1.5 * paragraph_aggregated$slight_positive_ratio +
2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio +
3 * paragraph_aggregated$extreme_positive_ratio -
(1.5 * paragraph_aggregated$slight_negative_ratio +
2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio +
3 * paragraph_aggregated$extreme_negative_ratio), na.rm = TRUE)
# Calculate the maximum negative impact
max_negative_impact <- max(1.5 * paragraph_aggregated$slight_negative_ratio +
2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio +
3 * paragraph_aggregated$extreme_negative_ratio -
(1.5 * paragraph_aggregated$slight_positive_ratio +
2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio +
3 * paragraph_aggregated$extreme_positive_ratio), na.rm = TRUE)
# Check if max buffer ratio is 0 and if so, set it to max neutral ratio
if (max_buffer_ratio == 0) {
max_buffer_ratio <- max_neutral_ratio
} else if (max_neutral_ratio == 0) { # Check if max neutral ratio is 0 and if so, set it to max buffer ratio
max_neutral_ratio <- max_buffer_ratio
}
# Check if both max positive impact and max negative impact are 0
if (max_positive_impact == 0 && max_negative_impact == 0) {
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = 0, stringsAsFactors = FALSE))
}
# Check if max positive impact is 0, set scaled positive ratio to 0
if (max_positive_impact == 0) {
paragraph_aggregated$scaled_positive_ratio <- 0
}
# Check if max negative impact is 0, set scaled negative ratio to 0
if (max_negative_impact == 0) {
paragraph_aggregated$scaled_negative_ratio <- 0
}
# Ensure max values are not zero or NA
max_values <- c(max_buffer_ratio, max_neutral_ratio, max_positive_impact, max_negative_impact) # Define max_values
print(max_values) # Print max_values
if(any(max_values == 0 | is.na(max_values))) {
warning("One or more max scaling factors are zero or NA.")
return(data.frame(paragraph = paragraph, predicted_mean_sentiment = NA, stringsAsFactors = FALSE))
}
# Apply scaling
paragraph_aggregated <- paragraph_aggregated %>%
mutate(
scaled_buffer_ratio = buffer_ratio / max_buffer_ratio,
scaled_neutral_ratio = neutral_ratio / max_neutral_ratio,
scaled_positive_ratio = if_else(max_positive_impact == 0, -0.5 * max_negative_impact,
(1.5 * paragraph_aggregated$slight_positive_ratio + 2 * paragraph_aggregated$medium_positive_ratio +
2.5 * paragraph_aggregated$moderate_positive_ratio + 3 * paragraph_aggregated$extreme_positive_ratio) / max_positive_impact),
scaled_negative_ratio = if_else(max_negative_impact == 0, -0.5 * max_positive_impact,
(1.5 * paragraph_aggregated$slight_negative_ratio + 2 * paragraph_aggregated$medium_negative_ratio +
2.5 * paragraph_aggregated$moderate_negative_ratio + 3 * paragraph_aggregated$extreme_negative_ratio) / max_negative_impact)
)
# Print the scaled ratios
print(paragraph_aggregated)
# Predict mean sentiment if not already set to 0
if (max_positive_impact != 0 || max_negative_impact != 0) {
predicted_sentiment <- predict(loaded_model, newdata = paragraph_aggregated)
} else {
predicted_sentiment <- 0
}
# Return a data frame with the original paragraph and predicted sentiment
result <- data.frame(paragraph = paragraph, predicted_mean_sentiment = predicted_sentiment)
return(result)
}
# Example usage
paragraph <- "Yesterday started off great. The sun was shining, birds chirping, and I felt on top of the world. However, as the day progressed, things took a turn for the worse. I got stuck in traffic on my way to work, which made me late for an important meeting. Then, to top it off, I spilled coffee all over my favorite shirt. Despite these setbacks, I managed to salvage the day by treating myself to a delicious dinner with friends in the evening. Overall, it was a rollercoaster of emotions, but I'm ending the day with a positive outlook."
loaded_model <- readRDS("sentiment_model.rds")
predicted_sentiment <- predict_mean_sentiment(paragraph, loaded_model)
print(predicted_sentiment)