-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcvtestshur.R
105 lines (80 loc) · 3.51 KB
/
cvtestshur.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Install packages if not already installed
if (!require("doParallel")) install.packages("doParallel")
if (!require("caret")) install.packages("caret")
# Load libraries
library(doParallel)
library(caret)
# Register parallel backend to use multiple cores
registerDoParallel(cores = detectCores() - 1) # use one less than the total number of cores
# Assuming 'train_data' is your full dataset already prepared
set.seed(123) # for reproducibility
train_index <- createDataPartition(train_data$adjusted_mean_sentiment, p = 0.8, list = FALSE)
train_set <- train_data[train_index, ]
test_set <- train_data[-train_index, ]
# Define your custom formula
sentiment_formula <- adjusted_mean_sentiment ~
log_emotion_ratio +
+ (scaled_positive_ratio:scaled_negative_ratio):log_emotion_ratio + scaled_buffer_ratio + scaled_neutral_ratio +
scaled_positive_ratio + scaled_negative_ratio +
scaled_buffer_ratio:scaled_neutral_ratio + # Interaction between buffer and neutral
scaled_positive_ratio:scaled_negative_ratio + decay_factor
# Train the model using your formula
model_rf <- randomForest(sentiment_formula, data = train_set, mtry = 3, ntree = 80)
# Predict on the test set
predictions <- predict(model_rf, test_set)
# Calculate performance metrics
test_rmse <- RMSE(predictions, test_set$adjusted_mean_sentiment)
test_r_squared <- cor(predictions, test_set$adjusted_mean_sentiment)^2
print(paste("Test RMSE: ", test_rmse))
print(paste("Test R-squared: ", test_r_squared))
# Set up 10-fold cross-validation with parallel processing
fitControl <- trainControl(
method = "cv",
number = 10,
savePredictions = "final",
allowParallel = TRUE # This enables parallel processing
)
# Train the model using cross-validation
model_cv <- train(
sentiment_formula,
data = train_data,
method = "rf",
trControl = fitControl,
tuneLength = 5,
preProcess = "scale" # only if scaling is needed
)
# Summarize the results
results <- model_cv$results
overall_rmse <- min(results$RMSE)
overall_r_squared <- max(results$Rsquared)
print(paste("Cross-Validated RMSE: ", overall_rmse))
print(paste("Cross-Validated R-squared: ", overall_r_squared))
# Plotting model performance across different tuning parameters
plot(model_cv)
print(model_cv)
#######furhtermetrics#########
# Assuming 'model_rf' is your trained model and 'test_set' is your test data
predictions <- predict(model_rf, test_set)
residuals <- test_set$adjusted_mean_sentiment - predictions
plot(predictions, residuals, main = "Residual vs. Predicted", xlab = "Predicted", ylab = "Residuals")
abline(h = 0, col = "red")
hist(residuals, breaks = 30, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Assuming predictions and actual values are stored in test_set
residuals <- test_set$adjusted_mean_sentiment - predictions
# Plot residuals to check for autocorrelation visually
plot(residuals, type = 'l', main = "Residuals Plot", xlab = "Observation", ylab = "Residuals")
abline(h = 0, col = "red")
# Additionally, use ACF to examine autocorrelation
acf(residuals, main = "Autocorrelation Function")
# Calculate residuals
residuals <- test_set$adjusted_mean_sentiment - predictions
# Plot residuals against predicted values
plot(predictions, residuals,
main = "Residuals vs. Predicted Values",
xlab = "Predicted Values",
ylab = "Residuals")
abline(h = 0, col = "red")
mean_residuals <- mean(residuals)
print(paste("Mean of Residuals:", mean_residuals))