simulation-pydata.Rmd

---
title: "simulation-pydata"
author: "fletcher & miguel"
date: "11/5/2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(ggplot2)
library(ggExtra)
library(viridis)
library(RColorBrewer)
library(vctrs)
library(scico)
```

```{r convdata FACET fig.height = 12, fig.width = 12}
convdf <- read.csv("results/testing2.csv")
#if it doesn't want to read it, put a space after the first "
View(convdf)
# Specify which path is chosen
convdf <- convdf %>% mutate(
  Favored = colnames(convdf %>% select(Final.Ants.1, Final.Ants.2))[max.col(convdf %>% select(Final.Ants.1, Final.Ants.2),ties.method="first")]
)
convdf <- convdf %>% mutate(
  Favored = as.integer(substr(Favored,nchar(Favored), nchar(Favored))), 
  Q.Favored = Q1 *(Favored ==1) + Q2 * (Favored ==2), 
  Q.Disfavored = Q1 *(Favored ==2) + Q2 * (Favored ==1), 
  D.Favored = D1 *(Favored ==1) + D2 * (Favored ==2), 
  D.Disfavored = D1 *(Favored ==2) + D2 * (Favored ==1), 
  Ants.Favored = Final.Ants.1 *(Favored ==1) + Final.Ants.2 * (Favored ==2), 
  Ants.Disfavored = Final.Ants.1 *(Favored ==2) + Final.Ants.2 * (Favored ==1), 
  )

convdf <- convdf %>% select (Convergence.Time, Q.Favored, Q.Disfavored, D.Favored, D.Disfavored, Ants.Favored, Ants.Disfavored, K, s) # CHANGE FOR PARAMS :3


round_any = function(x, accuracy, f=round){f(x/ accuracy) * accuracy}

convDifference <- convdf %>% mutate(
  Dif.Q = Q.Favored- Q.Disfavored, 
  Dif.D = D.Favored- D.Disfavored,
  Dif.Ants = Ants.Favored- Ants.Disfavored,
  Rounded.Dif.Ants = round_any(Dif.Ants, 1000))

#View (convDifference)

p1<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Tradeoffs between two resources") + ylab("Difference in Quality") +xlab("Difference in Distance") + facet_grid( K ~ s, labeller = label_both)  # :3

# reg<-lm(dist ~ speed, data = cars)
# abline(reg, col="blue")


p2<- ggplot(convDifference %>% filter (Convergence.Time>= 40), aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Runs with a Convergence Time of 40-50") + ylab("Difference in Quality") +xlab("Difference in Distance") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0)+ geom_vline(xintercept=0)

# add for axes:
# + geom_hline(yintercept=0)+ geom_vline(xintercept=0)

ggplot(convDifference, aes(x= Convergence.Time, y = Dif.Q, color = Dif.D)) + 
     geom_point(size=0.7, alpha = 0.7) + ggtitle("Convergence time and Difference in Quality") + ylab("Difference in Quality") +xlab("Convergence time") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0, colour = "black") + scale_color_scico(palette = 'vikO', limits = c(-1, 1) * max(abs(convDifference$Dif.D))) + theme_minimal()

ggplot(convDifference, aes(x= Convergence.Time, y = Dif.D, color = Dif.Q)) + 
     geom_point(size=0.7, alpha = 0.7) + ggtitle("Convergence time and Difference in Distance") + ylab("Difference in Distance") +xlab("Convergence time") + facet_grid( K ~ s, labeller = label_both) + geom_hline(yintercept=0, colour = "black") + scale_color_scico(palette = 'vikO', limits = c(-1, 1) * max(abs(convDifference$Dif.Q))) + theme_minimal()
```


```{r conv time data}
convdf <- read.csv("results/testing2.csv")
#if it doesn't want to read it, put a space after the first "

# Specify which path is chosen
convdf <- convdf %>% mutate(
  Favored = colnames(convdf %>% select(Final.Ants.1, Final.Ants.2))[max.col(convdf %>% select(Final.Ants.1, Final.Ants.2),ties.method="first")]
)
convdf <- convdf %>% mutate(
  Favored = as.integer(substr(Favored,nchar(Favored), nchar(Favored))), 
  Q.Favored = Q1 *(Favored ==1) + Q2 * (Favored ==2), 
  Q.Disfavored = Q1 *(Favored ==2) + Q2 * (Favored ==1), 
  D.Favored = D1 *(Favored ==1) + D2 * (Favored ==2), 
  D.Disfavored = D1 *(Favored ==2) + D2 * (Favored ==1), 
  Ants.Favored = Final.Ants.1 *(Favored ==1) + Final.Ants.2 * (Favored ==2), 
  Ants.Disfavored = Final.Ants.1 *(Favored ==2) + Final.Ants.2 * (Favored ==1), 
  )

convdf <- convdf %>% select (Convergence.Time, Q.Favored, Q.Disfavored, D.Favored, D.Disfavored, Ants.Favored, Ants.Disfavored)
#View(convdf)

round_any = function(x, accuracy, f=round){f(x/ accuracy) * accuracy}

convDifference <- convdf %>% mutate(
  Dif.Q = Q.Favored- Q.Disfavored, 
  Dif.D = D.Favored- D.Disfavored,
  Dif.Ants = Ants.Favored- Ants.Disfavored,
  Rounded.Dif.Ants = round_any(Dif.Ants, 1000))
p1<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Tradeoffs between two resources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")
p1m <- ggMarginal(p1, type="histogram", size=10)
#p1
p1m

# density plot
ggplot(convDifference, aes(x= Dif.D, y = Dif.Q) ) +
 stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white")+ ggtitle("Density plot of differences of Q, D btwn two sources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")

p2 <- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Dif.Ants)) + 
    geom_point(size=1) + ggtitle("Tradeoffs between two resources") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source")
#p2

p3<- ggplot(convDifference, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Tradeoffs: difference in ants rounded to nearest 1000") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source") + facet_wrap(~Rounded.Dif.Ants)
p3

p4<- ggplot(convDifference %>% filter (Convergence.Time>35), aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Tradeoffs: difference in ants rounded to nearest 1000") + xlab("Dif in D btwn favored and disfavored source") +ylab("Dif in Q btwn favored and disfavored source") + facet_wrap(~Rounded.Dif.Ants)
#p4
```

```{r conv time visuals old}
convdf <- read.csv("2trail_conv_vals_QDST.csv")

convdf <- convdf %>% mutate(Dif.Q = abs(Q1 - Q2), Dif.D = abs(D1 - D2))
ggplot(convdf, aes(x= Dif.D, y = Dif.Q, color = Convergence.Time)) + 
    geom_point(size=1) + ggtitle("Convergence times in 2-resource simulations") +xlab("Difference in sources' distances") +ylab("Difference in sources' qualities")
head(convdf)
# tidyconv <- conv_df %>% pivot_longer(c('Q1', 'Q2'), names_to = "Source", values_to = "Quality")
# tidyconv <- tidyconv %>% pivot_longer(c('D1', 'D2'), names_to = "Source", values_to = "Distance")
# head(tidyconv)
#ggplot(conv_df, aes(x = ))

```


```{r Import csvs}
# Make sure that the CSVs have been saved to the same location as this RMD
# Otherwise, update the read.csv to include your path

# IMPORT PARAMETER CSV
# At the moment, not necessary
# Fletcher's been trying to debug issues with extracting variables from the csv
# param_df <- read.csv("params_gamma23_1000runs.csv")
# val_readout <- c(rbind(param_df$Param, param_df$Value))

# IMPORT WEIGHTED AVERAGES DF
# ❗️🐝 Change name to your CSV filename. Make sure it's in the same directory as this RMD❗❗️
wavg_df <- read.csv("results/2-sweep-test.csv")

#head(param_df)
#View(param_df)
```

## Sweeping One Parameter

```{r histogram ONE param}
require(gridExtra)

# Only run this chunk if your csv is from a 1-parameter sweep
# CSVs with data from 2-parameter sweeps might do funky stuff

# An attempt to caption the graphs with their parameter set
# capt0 <- paste(val_readout, collapse = ' ')
# capt1 <- substr(capt0, start=1, stop=(nchar(capt0))/2)
# capt2 <- substr(capt0, start=1+(nchar(capt0))/2, stop=nchar(capt0))
# capt0 <- paste(val_readout[0:10], collapse = ' ')
# capt1 <- paste(val_readout[11:20], collapse = ' ')
# capt2 <- paste(val_readout[21:30], collapse = ' ')
# capt3 <- paste(val_readout[31:40], collapse = ' ')
# capt4 <- paste(capt0, "\n", capt1, "\n", capt2, "\n", capt3)

wQprob_hist <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(Param.Values))+labs (title = "alpha")+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))

wDprob_hist <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(Param.Values)) +labs (title = "alpha")+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))

#Another option: y = ..scaled.. (Scales to highest value is 1)

grid.arrange(wQprob_hist, wDprob_hist, ncol=2)#, bottom = capt4)

wQ_box<- ggplot(wavg_df, aes(x= as.factor(Param.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("alpha values")

wD_box<- ggplot(wavg_df, aes(x= as.factor(Param.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("alpha values")

grid.arrange(wQ_box, wD_box, ncol=2)

```

## Sweeping Two Parameters

This chunk creates a heatmap showing the average weighted quality and average weighted distance over all of the trials run with that set of parameters. We wrote this with the goal of figuring out what the relationship between gamma 2 and gamma 3 is. Knowing that will help us decide how to tether them when we run larger parameter sweeps. So far, we haven't seen any identifyable patterns.

```{r heatmap TWO params, fig.height = 2.5, fig.width = 6}
library(magrittr)
wavg_df <- wavg_df %>% filter(ParamA.Values > 0 & ParamB.Values >0)

#View(wavg_df)
wavg_means <- wavg_df %>% group_by(ParamA.Values, ParamB.Values) %>%
  summarize(WeightedQ = mean(WeightedQ),
            WeightedD = mean(WeightedD))
#View(wavg_means)

wQ_heat <- ggplot(wavg_means, aes(ParamA.Values, ParamB.Values, fill= WeightedQ)) +
  geom_tile() + xlab("Eta 1") + ylab("Eta 2")

wD_heat <- ggplot(wavg_means, aes(ParamA.Values, ParamB.Values, fill= WeightedD)) +
  geom_tile() + xlab("Eta 1") + ylab("Eta 2")

require(gridExtra)
grid.arrange(wQ_heat, wD_heat, ncol=2)
```

```{r best fit correlogram, fig.height = 7, fig.width = 7}
ggplot(wavg_df, aes(x=WeightedQ, y = WeightedD)) + facet_grid(rows = vars(ParamB.Values), cols = vars(ParamA.Values)) + geom_smooth(method=lm, se=TRUE) + xlab ("X Axis: Weighted Average of Quality; Facet: Param 1 values") + ylab ("Y Axis: Weighted Average of Distance; Facet: Param 2 values")

# best fit density plot
# has blue plot of weighted avg of quality and distance, color depends on how many points there
# with red best fit line
ggplot(wavg_df, aes(x=WeightedQ, y = WeightedD)) + facet_grid(rows = vars(ParamB.Values), cols = vars(ParamA.Values)) + xlab ("X Axis: Weighted Average of Quality; Facet: Param 1 values") + ylab ("Y Axis: Weighted Average of Distance; Facet: Param 2 values") +stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  theme(
    legend.position='none'
  )  + geom_smooth(method=lm, color = "red",  se=FALSE)
```

These are some histograms for 2-parameter sweeps looking at the weighted averages of quality and distance over a set of one parameter's values. Note that when we look at a particular parameter 1 value here, we're combining the data from various parameter 2 values. It definitely isn't an ideal way of visualizing this info, but I wanted a way of seeing how "strong" the differences are over a set of parameter values, regardless of where in the range we provided the other param we were sweeping was. That could help us see if one parameter is able to "overpower" another and redefine the foraging behavior of the ants without the other parameter's help.
We sometimes see a pile-up at zero because beyond a certain distance (determined by gamma 1), the ants don't venture out far enough to discover any of the resources. So, the average quality and distance of the resources is zero.
```{r histograms, fig.height = 7, fig.width = 7}
# ⚠️ Note that these plots are summed over the values of the other parameter
# To undo this, use %>% filter(ParamB.Values == 1)) or equivalent
# for some value of the parameter that was tested

# PARAM 1 HISTOGRAMS
wQprob_hist1 <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamA.Values))+labs (title = "Parameter 1 Quality Weighted Avg")

wDprob_hist1 <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(ParamA.Values)) +labs (title = "Parameter 1 Distance Weighted Avg")

# PARAM 2 HISTOGRAMS
wQprob_hist2 <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedQ, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamB.Values))+labs (title = "Parameter 2 Quality Weighted Avg")

wDprob_hist2 <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = WeightedD, y = ..density..), binwidth = .01)+ facet_grid(rows = vars(ParamB.Values)) +labs (title = "Parameter 2 Distance Weighted Avg")

grid.arrange(wQprob_hist1, wDprob_hist1, wQprob_hist2, wDprob_hist2, ncol=2)

#⚠️ Note that these plots are summed over the values of the other parameter

#PARAM 1 BOX PLOTS
wQ_box1 <- ggplot(wavg_df, aes(x= as.factor(ParamA.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("Param 1")

wD_box1 <- ggplot(wavg_df, aes(x= as.factor(ParamA.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("Param 1")

wQ_box2 <- ggplot(wavg_df, aes(x= as.factor(ParamB.Values), y=WeightedQ)) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab ("Param 2")

wD_box2 <- ggplot(wavg_df, aes(x= as.factor(ParamB.Values), y=WeightedD)) + geom_boxplot(fill="slateblue", alpha=0.2)+ xlab ("Param 2")

grid.arrange(wQ_box1, wD_box1,wQ_box2, wD_box2, ncol=2, nrow=2)
```

The difference between averages (not an "average difference"- the wording is a little confusing) represented here is the weighted average of quality (distance) minus the average quality (distance) of all of the food sources. In other words, the difference between averages is telling us how different the ants' "average" chosen resource is from the "average" resource in the environment.

A positive quality difference tells us that the ants are selecting higher quality resources than the average the environment provides. A positive distance difference tells us that the ants are selecting resources that are farther than average from the nest.

The below graphs show histograms of the difference of averages for quality and distance. They are faceted by the values of the two parameters we're sweeping. Each column is a different parameter 1 value and each row is a different parameter 2 value. The red vertical line on each graph represents zero.

```{r difference btwn averages, fig.height = 7, fig.width = 7}
# positive difference for Q- picking better quality than environment's average
# positive difference for D- picking sources farther than environment's average
# Python: dif_btwn_avgs_D[w] = weight_avg_D[w] - avg_D[w]   # negative difference- picking better than environment

difQplot <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = Dif.Avgs.Q, y = ..density..), binwidth = 1)+ facet_grid(rows = vars(ParamA.Values), cols = vars(ParamB.Values))+labs (title = "Difference = Weighted Avg Quality — Avg Quality") + xlab("Eta 1")+ ylab("Eta 2") + geom_vline(xintercept = 0, color = "red", size=0.5)

difDplot <- ggplot(data = wavg_df) +
  geom_histogram(mapping = aes(x = Dif.Avgs.D, y = ..density..), binwidth = .02)+ facet_grid(rows = vars(ParamA.Values), cols = vars(ParamB.Values))+labs (title = "Difference = Weighted Avg Dist - Avg Dist") + xlab("Eta 1")+ ylab("Eta 2") + geom_vline(xintercept = 0, color = "red", size=0.5)

grid.arrange(difQplot, difDplot, ncol=2)

```