-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsimulation-pydata.Rmd
287 lines (214 loc) · 15.5 KB
/
simulation-pydata.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
---
title: "simulation-pydata"
author: "fletcher & miguel"
date: "11/5/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(ggplot2)
library(ggExtra)
library(viridis)
library(RColorBrewer)
library(vctrs)
library(scico)
```
```{r convdata FACET fig.height = 12, fig.width = 12}
convdf <- read.csv("results/testing2.csv")
#if it doesn't want to read it, put a space after the first "
View(convdf)
# Specify which path is chosen
convdf <- convdf %>% mutate(
Favored = colnames(convdf %>% select(Final.Ants.1, Final.Ants.2))[max.col(convdf %>% select(Final.Ants.1, Final.Ants.2),ties.method="first")]
)
convdf <- convdf %>% mutate(
Favored = as.integer(substr(Favored,nchar(Favored), nchar(Favored))),
Q.Favored = Q1 *(Favored ==1) + Q2 * (Favored ==2),
Q.Disfavored = Q1 *(Favored ==2) + Q2 * (Favored ==1),
D.Favored = D1 *(Favored ==1) + D2 * (Favored ==2),
D.Disfavored = D1 *(Favored ==2) + D2 * (Favored ==1),
Ants.Favored = Final.Ants.1 *(Favored ==1) + Final.Ants.2 * (Favored ==2),
Ants.Disfavored = Final.Ants.1 *(Favored ==2) + Final.Ants.2 * (Favored ==1),
)
convdf <- convdf %>% select (Convergence.Time, Q.Favored, Q.Disfavored, D.Favored, D.Disfavored, Ants.Favored, Ants.Disfavored, K, s) # CHANGE FOR PARAMS :3
round_any = function(x, accuracy, f=round){f(x/ accuracy) * accuracy}
convDifference <- convdf %>% mutate(
Dif.Q = Q.Favored- Q.Disfavored,
Dif.D = D.Favored- D.Disfavored,
Dif.Ants = Ants.Favored- Ants.Disfavored,
Rounded.Dif.Ants = round_any(Dif.Ants, 1000))
#View (convDifference)
p1<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Tradeoffs between two resources") + ylab("Difference in Quality") +xlab("Difference in Distance") + facet_grid( K ~ s, labeller = label_both) # :3
# reg<-lm(dist ~ speed, data = cars)
# abline(reg, col="blue")
p2<- ggplot(convDifference %>% filter (Convergence.Time>= 40), aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Runs with a Convergence Time of 40-50") + ylab("Difference in Quality") +xlab("Difference in Distance") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0)+ geom_vline(xintercept=0)
# add for axes:
# + geom_hline(yintercept=0)+ geom_vline(xintercept=0)
ggplot(convDifference, aes(x= Convergence.Time, y = Dif.Q, color = Dif.D)) +
geom_point(size=0.7, alpha = 0.7) + ggtitle("Convergence time and Difference in Quality") + ylab("Difference in Quality") +xlab("Convergence time") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0, colour = "black") + scale_color_scico(palette = 'vikO', limits = c(-1, 1) * max(abs(convDifference$Dif.D))) + theme_minimal()
ggplot(convDifference, aes(x= Convergence.Time, y = Dif.D, color = Dif.Q)) +
geom_point(size=0.7, alpha = 0.7) + ggtitle("Convergence time and Difference in Distance") + ylab("Difference in Distance") +xlab("Convergence time") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0, colour = "black") + scale_color_scico(palette = 'vikO', limits = c(-1, 1) * max(abs(convDifference$Dif.Q))) + theme_minimal()
```
```{r conv time data}
convdf <- read.csv("results/testing2.csv")
#if it doesn't want to read it, put a space after the first "
# Specify which path is chosen
convdf <- convdf %>% mutate(
Favored = colnames(convdf %>% select(Final.Ants.1, Final.Ants.2))[max.col(convdf %>% select(Final.Ants.1, Final.Ants.2),ties.method="first")]
)
convdf <- convdf %>% mutate(
Favored = as.integer(substr(Favored,nchar(Favored), nchar(Favored))),
Q.Favored = Q1 *(Favored ==1) + Q2 * (Favored ==2),
Q.Disfavored = Q1 *(Favored ==2) + Q2 * (Favored ==1),
D.Favored = D1 *(Favored ==1) + D2 * (Favored ==2),
D.Disfavored = D1 *(Favored ==2) + D2 * (Favored ==1),
Ants.Favored = Final.Ants.1 *(Favored ==1) + Final.Ants.2 * (Favored ==2),
Ants.Disfavored = Final.Ants.1 *(Favored ==2) + Final.Ants.2 * (Favored ==1),
)
convdf <- convdf %>% select (Convergence.Time, Q.Favored, Q.Disfavored, D.Favored, D.Disfavored, Ants.Favored, Ants.Disfavored)
#View(convdf)
round_any = function(x, accuracy, f=round){f(x/ accuracy) * accuracy}
convDifference <- convdf %>% mutate(
Dif.Q = Q.Favored- Q.Disfavored,
Dif.D = D.Favored- D.Disfavored,
Dif.Ants = Ants.Favored- Ants.Disfavored,
Rounded.Dif.Ants = round_any(Dif.Ants, 1000))
p1<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Tradeoffs between two resources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")
p1m <- ggMarginal(p1, type="histogram", size=10)
#p1
p1m
# density plot
ggplot(convDifference, aes(x= Dif.D, y = Dif.Q) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white")+ ggtitle("Density plot of differences of Q, D btwn two sources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")
p2 <- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Dif.Ants)) +
geom_point(size=1) + ggtitle("Tradeoffs between two resources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")
#p2
p3<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Tradeoffs: difference in ants rounded to nearest 1000") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source") + facet_wrap(~Rounded.Dif.Ants)
p3
p4<- ggplot(convDifference %>% filter (Convergence.Time>35), aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Tradeoffs: difference in ants rounded to nearest 1000") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source") + facet_wrap(~Rounded.Dif.Ants)
#p4
```
```{r conv time visuals old}
convdf <- read.csv("2trail_conv_vals_QDST.csv")
convdf <- convdf %>% mutate(Dif.Q = abs(Q1 - Q2), Dif.D = abs(D1 - D2))
ggplot(convdf, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) +
geom_point(size=1) + ggtitle("Convergence times in 2-resource simulations") +xlab("Difference in sources' distances") +ylab("Difference in sources' qualities")
head(convdf)
# tidyconv <- conv_df %>% pivot_longer(c('Q1', 'Q2'), names_to = "Source", values_to = "Quality")
# tidyconv <- tidyconv %>% pivot_longer(c('D1', 'D2'), names_to = "Source", values_to = "Distance")
# head(tidyconv)
#ggplot(conv_df, aes(x = ))
```
```{r Import csvs}
# Make sure that the CSVs have been saved to the same location as this RMD
# Otherwise, update the read.csv to include your path
# IMPORT PARAMETER CSV
# At the moment, not necessary
# Fletcher's been trying to debug issues with extracting variables from the csv
# param_df <- read.csv("params_gamma23_1000runs.csv")
# val_readout <- c(rbind(param_df$Param, param_df$Value))
# IMPORT WEIGHTED AVERAGES DF
# ❗️🐝 Change name to your CSV filename. Make sure it's in the same directory as this RMD❗❗️
wavg_df <- read.csv("results/2-sweep-test.csv")
#head(param_df)
#View(param_df)
```
## Sweeping One Parameter
```{r histogram ONE param}
require(gridExtra)
# Only run this chunk if your csv is from a 1-parameter sweep
# CSVs with data from 2-parameter sweeps might do funky stuff
# An attempt to caption the graphs with their parameter set
# capt0 <- paste(val_readout, collapse = ' ')
# capt1 <- substr(capt0, start=1, stop=(nchar(capt0))/2)
# capt2 <- substr(capt0, start=1+(nchar(capt0))/2, stop=nchar(capt0))
# capt0 <- paste(val_readout[0:10], collapse = ' ')
# capt1 <- paste(val_readout[11:20], collapse = ' ')
# capt2 <- paste(val_readout[21:30], collapse = ' ')
# capt3 <- paste(val_readout[31:40], collapse = ' ')
# capt4 <- paste(capt0, "\n", capt1, "\n", capt2, "\n", capt3)
wQprob_hist <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(Param.Values))+labs (title = "alpha")+
theme(plot.margin = unit(c(1,1,1,1), "cm"))
wDprob_hist <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(Param.Values)) +labs (title = "alpha")+
theme(plot.margin = unit(c(1,1,1,1), "cm"))
#Another option: y = ..scaled.. (Scales to highest value is 1)
grid.arrange(wQprob_hist, wDprob_hist, ncol=2)#, bottom = capt4)
wQ_box<- ggplot(wavg_df, aes(x= as.factor(Param.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("alpha values")
wD_box<- ggplot(wavg_df, aes(x= as.factor(Param.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("alpha values")
grid.arrange(wQ_box, wD_box, ncol=2)
```
## Sweeping Two Parameters
This chunk creates a heatmap showing the average weighted quality and average weighted distance over all of the trials run with that set of parameters. We wrote this with the goal of figuring out what the relationship between gamma 2 and gamma 3 is. Knowing that will help us decide how to tether them when we run larger parameter sweeps. So far, we haven't seen any identifyable patterns.
```{r heatmap TWO params, fig.height = 2.5, fig.width = 6}
library(magrittr)
wavg_df <- wavg_df %>% filter(ParamA.Values > 0 & ParamB.Values >0)
#View(wavg_df)
wavg_means <- wavg_df %>% group_by(ParamA.Values, ParamB.Values) %>%
summarize(WeightedQ = mean(WeightedQ),
WeightedD = mean(WeightedD))
#View(wavg_means)
wQ_heat <- ggplot(wavg_means, aes(ParamA.Values, ParamB.Values, fill= WeightedQ)) +
geom_tile() + xlab("Eta 1") + ylab("Eta 2")
wD_heat <- ggplot(wavg_means, aes(ParamA.Values, ParamB.Values, fill= WeightedD)) +
geom_tile() + xlab("Eta 1") + ylab("Eta 2")
require(gridExtra)
grid.arrange(wQ_heat, wD_heat, ncol=2)
```
```{r best fit correlogram, fig.height = 7, fig.width = 7}
ggplot(wavg_df, aes(x=WeightedQ, y = WeightedD)) + facet_grid(rows = vars(ParamB.Values), cols = vars(ParamA.Values)) + geom_smooth(method=lm, se=TRUE) + xlab ("X Axis: Weighted Average of Quality; Facet: Param 1 values") + ylab ("Y Axis: Weighted Average of Distance; Facet: Param 2 values")
# best fit density plot
# has blue plot of weighted avg of quality and distance, color depends on how many points there
# with red best fit line
ggplot(wavg_df, aes(x=WeightedQ, y = WeightedD)) + facet_grid(rows = vars(ParamB.Values), cols = vars(ParamA.Values)) + xlab ("X Axis: Weighted Average of Quality; Facet: Param 1 values") + ylab ("Y Axis: Weighted Average of Distance; Facet: Param 2 values") +stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(
legend.position='none'
) + geom_smooth(method=lm, color = "red", se=FALSE)
```
These are some histograms for 2-parameter sweeps looking at the weighted averages of quality and distance over a set of one parameter's values. Note that when we look at a particular parameter 1 value here, we're combining the data from various parameter 2 values. It definitely isn't an ideal way of visualizing this info, but I wanted a way of seeing how "strong" the differences are over a set of parameter values, regardless of where in the range we provided the other param we were sweeping was. That could help us see if one parameter is able to "overpower" another and redefine the foraging behavior of the ants without the other parameter's help.
We sometimes see a pile-up at zero because beyond a certain distance (determined by gamma 1), the ants don't venture out far enough to discover any of the resources. So, the average quality and distance of the resources is zero.
```{r histograms, fig.height = 7, fig.width = 7}
# ⚠️ Note that these plots are summed over the values of the other parameter
# To undo this, use %>% filter(ParamB.Values == 1)) or equivalent
# for some value of the parameter that was tested
# PARAM 1 HISTOGRAMS
wQprob_hist1 <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamA.Values))+labs (title = "Parameter 1 Quality Weighted Avg")
wDprob_hist1 <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(ParamA.Values)) +labs (title = "Parameter 1 Distance Weighted Avg")
# PARAM 2 HISTOGRAMS
wQprob_hist2 <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamB.Values))+labs (title = "Parameter 2 Quality Weighted Avg")
wDprob_hist2 <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(ParamB.Values)) +labs (title = "Parameter 2 Distance Weighted Avg")
grid.arrange(wQprob_hist1, wDprob_hist1, wQprob_hist2, wDprob_hist2, ncol=2)
#⚠️ Note that these plots are summed over the values of the other parameter
#PARAM 1 BOX PLOTS
wQ_box1 <- ggplot(wavg_df, aes(x= as.factor(ParamA.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("Param 1")
wD_box1 <- ggplot(wavg_df, aes(x= as.factor(ParamA.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("Param 1")
wQ_box2 <- ggplot(wavg_df, aes(x= as.factor(ParamB.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("Param 2")
wD_box2 <- ggplot(wavg_df, aes(x= as.factor(ParamB.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("Param 2")
grid.arrange(wQ_box1, wD_box1,wQ_box2, wD_box2, ncol=2, nrow=2)
```
The difference between averages (not an "average difference"- the wording is a little confusing) represented here is the weighted average of quality (distance) minus the average quality (distance) of all of the food sources. In other words, the difference between averages is telling us how different the ants' "average" chosen resource is from the "average" resource in the environment.
A positive quality difference tells us that the ants are selecting higher quality resources than the average the environment provides. A positive distance difference tells us that the ants are selecting resources that are farther than average from the nest.
The below graphs show histograms of the difference of averages for quality and distance. They are faceted by the values of the two parameters we're sweeping. Each column is a different parameter 1 value and each row is a different parameter 2 value. The red vertical line on each graph represents zero.
```{r difference btwn averages, fig.height = 7, fig.width = 7}
# positive difference for Q- picking better quality than environment's average
# positive difference for D- picking sources farther than environment's average
# Python: dif_btwn_avgs_D[w] = weight_avg_D[w] - avg_D[w] # negative difference- picking better than environment
difQplot <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = Dif.Avgs.Q, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamA.Values), cols = vars(ParamB.Values))+labs (title = "Difference = Weighted Avg Quality — Avg Quality") + xlab("Eta 1")+ ylab("Eta 2") + geom_vline(xintercept = 0, color = "red", size=0.5)
difDplot <- ggplot(data = wavg_df) +
geom_histogram(mapping = aes(x = Dif.Avgs.D, y = ..density..), binwidth = .02)+ facet_grid(rows = vars(ParamA.Values), cols = vars(ParamB.Values))+labs (title = "Difference = Weighted Avg Dist - Avg Dist") + xlab("Eta 1")+ ylab("Eta 2") + geom_vline(xintercept = 0, color = "red", size=0.5)
grid.arrange(difQplot, difDplot, ncol=2)
```