Sensitivity_Analysis.Rmd

---
title: "Discovery and inference of population structure in falciparum malaria"
author: "James Watson"
date: "05/11/2019"
output:
  html_document:
    df_print: paged
    keep_md: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(cache = T, cache.comments = FALSE, 
                      include = TRUE, 
                      fig.width = 12, fig.height = 12,
                      fig.pos = 'H', 
                      dev = 'png', dpi = 300)

library(fastcluster)
library(RColorBrewer)
library(dendextend)
library(plyr)
library(gplots)
```

## Load meta data and pairwise distance matrices

```{r}
# meta data on 393 individuals in the TRAC1 study
load('RData/metadata.RData')

# 393 x 393 whole genome based 1-IBS distance matrix
load(file = 'RData/IBD_distance_matrix.RData')
IBD_WG = IBD_dist_matrix

# 393 x 393 whole genome based 1-IBD distance matrix
load(file = 'RData/IBS_distance_matrix.RData')
IBS_WG = IBS_dist_matrix

writeLines(sprintf('There are %s samples in total', nrow(metadata)))
```

Overview of the samples and of the mutations (Table 1 in the paper)
```{r}
table(metadata$Country)
table(metadata$k13Class, metadata$Country)
table(metadata$PLA1, metadata$Country)
table(metadata$crt_class, metadata$Country)
```

Make the -log_2 IBD distance matrix. We also look at other bases for the logarithmic transformation to check robustness of the halving assumption (there is an unknown amount of selfing going on so should be less than 0.5 per outcrossing event).

```{r}
sens_parameter = 2 # this can be changed as a sensitivity analysis to max value

IBD_neglog2_WG = -log(1-IBD_WG, base = 2)
max_val = max(IBD_neglog2_WG[!is.infinite(IBD_neglog2_WG)])
IBD_neglog2_WG[IBD_neglog2_WG>max_val] = max_val

IBD_neglog_WG_robust = -log(1-IBD_WG, base = 1.1) # approx 0.9 per outcrossing event
max_val = max(IBD_neglog_WG_robust[!is.infinite(IBD_neglog_WG_robust)])
IBD_neglog_WG_robust[IBD_neglog_WG_robust>max_val] = max_val

IBD_neglog_WG_robust2 = -log(1-IBD_WG, base = 2) 
max_val = max(IBD_neglog_WG_robust2[!is.infinite(IBD_neglog_WG_robust2)])
IBD_neglog_WG_robust2[IBD_neglog_WG_robust2>max_val] = max_val*sens_parameter
```


## Visualise whole genome pairwise distance matrices

This is Figure 2 in the paper

```{r IBS_versus_IBD}
par(las=1, mar = c(5,6,3,3), cex.axis=1.5, cex.lab=1.5, family = 'serif', bty='n')
layout(mat = matrix(c(1,2,3,4),nrow = 2,byrow = T))

h_IBS = hist(IBS_WG[upper.tri(IBS_WG)],breaks = 30,plot=F)
h_IBS$counts = log10(h_IBS$counts)
plot(h_IBS,main = '',ylab='',xlab='',yaxt='n', col='lightgrey', border=NA)
mtext(text='1-IBS', side = 1, line=4, cex=1.5)
mtext(text='Isolate pair count', side = 2, line=4, cex=1.5, las=3)
axis(2, at = 1:4, labels = expression(10, 10^2, 10^3, 10^4))
mtext(text='A', side = 3, adj = 0, line=0.5, cex=1.5)

h_IBD = hist(IBD_WG[upper.tri(IBD_WG)],breaks = 30,plot=F)
h_IBD$counts = log10(h_IBD$counts)
plot(h_IBD, xlab='',ylab = '', yaxt='n', col='lightgrey', border=NA,main='')
mtext(text='1-IBD', side = 1, line=4, cex=1.5)
mtext(text='Isolate pair count', side = 2, line=4, cex=1.5, las=3)
axis(2, at = 1:4, labels = expression(10, 10^2, 10^3, 10^4))
mtext(text='B', side = 3, adj = 0, line=0.5, cex=1.5)

h_IBD_log = hist(IBD_neglog2_WG[upper.tri(IBD_neglog2_WG)],breaks = 30,plot=F)
h_IBD_log$counts[h_IBD_log$counts>0] = log10(h_IBD_log$counts[h_IBD_log$counts>0])
h_IBD_log$counts[h_IBD_log$counts==0] = NA
not_na_ind = complete.cases(h_IBD_log$counts)
plot(h_IBD_log, main='',ylab='', yaxt='n', xlab = '',
     col='lightgrey', border=NA, ylim=range(h_IBD_log$counts,na.rm=T))
mtext(text=expression('-log'[2]*' IBD'), side = 1, line=4, cex=1.5)
mtext(text='Isolate pair count', side = 2, line=4, cex=1.5, las=3)
axis(2, at = 1:4, labels = expression(10, 10^2, 10^3, 10^4))
mtext(text='C', side = 3, adj = 0, line=0.5, cex=1.5)

plot(IBD_WG[lower.tri(IBD_WG)],  IBS_WG[lower.tri(IBS_WG)], pch='.',
     xlab = '', ylab = '', panel.first = grid())
mtext(text='1-IBD', side = 1, line=4, cex=1.5)
mtext(text='1-IBS', side = 2, line=4, cex=1.5, las=3)
mtext(text='D', side = 3, adj = 0, line=0.5, cex=1.5)
```

# PCoA and PCA

Add color column to meta data corresponding to kelch mutations
```{r}
sort(table(metadata$k13Class))

metadata$`Kelch` = metadata$k13Class
metadata$`Kelch`[!metadata$`Kelch` %in% c('R539T','Y493H','WT','C580Y','P553L','I543T')] = 'Other'
metadata$k13colors =
  mapvalues(metadata$Kelch,
            from = unique(metadata$Kelch),
            to=brewer.pal(name = 'Set1', n = length(unique(metadata$Kelch))))

metadata$Plasmepcolors =
  mapvalues(metadata$PLA1,
            from = unique(metadata$PLA1),
            to=brewer.pal(name = 'Dark2', n = 3)[1:2])
```

Compute PCoA on the distance matrices
```{r}
N = nrow(metadata)
K = 5
clas_scale_IBS = cmdscale(d = IBS_WG, k = N-1, eig = T, add = F)
clas_scale_IBD = cmdscale(d = IBD_WG, k = N-1, eig = T, add = F)
clas_scale_logIBD = cmdscale(d = IBD_neglog2_WG, k = N-1, eig = T, add = F)
clas_scale_logIBD_robust = cmdscale(d = IBD_neglog_WG_robust, k = N-1, eig = T, add = F)
clas_scale_logIBD_robust2 = cmdscale(d = IBD_neglog_WG_robust2, k = N-1, eig = T, add = F)
```

Compute PCA on the co-ancestry matrix.
The co-ancestry matrix was computed using the terminal command:

*fs cp -a 0 0 -in -iM -i 10 -n 1000 -j -g trac_cp.phase -r trac_cp.recombfile -t trac_cp.ids -o output_Ne_thousand*

We did a sensitivity analysis varying the value -n (0.1 to 1000).

```{r}
mypca<-function(x,zeromean=T,fdiag=T){
  ## do PCA on x %*% t(x) , normalising first if desired
  xN<-mypcanorm(x,zeromean,fdiag)
  tcov<-xN %*% t(xN)
  eigen(tcov)	
}


mypcanorm<-function(x,zeromean=T,fdiag=T){
  ## Normalise for PCA by setting the diagonal to be the average of the rows, then zero meaning by substracting the row mean (so the diagonal ends up as zero)
  if(fdiag){
    diag(x)<-rowSums(x)/(dim(x)[1]-1)
  }
  if(zeromean){
    return(x-rowMeans(x))
  }else{
    return(x)
  }
}
# Co-ancestry matrix with Ne = 1000
chunkfile<-"RData/output_Ne_thousand.chunkcounts.out" 
dataraw<-as.matrix(read.table(chunkfile,row.names=1,header=T,skip=0)) # read in the pairwise coincidence 
pcares<-mypca(dataraw)
```

```{r PCA_var_explained}
par(las=1, bty='n', cex.axis=2, cex.lab=2, mar=c(5,5,2,2))
plot(100*clas_scale_IBS$eig[1:K]/sum(clas_scale_IBS$eig), type='l',lwd=3,
     ylim=c(0,100), xlab = 'Component number', ylab='Variance explained', xaxt='n')
axis(1, 1:K)
lines(100*clas_scale_IBD$eig[1:K]/sum(clas_scale_IBD$eig), lty=2,lwd=3)
lines(100*clas_scale_logIBD$eig[1:K]/sum(clas_scale_logIBD$eig), lty=3,lwd=3)
legend('topright', legend = c('IBS','IBD','-log2 IBD'), lty = 1:3, lwd=3, cex=2, inset=0.03, bty='n')
```


Comparison of PCs 1-2 for the 4 distance/similarity matrices: this is Figure 2
```{r PCAcomparison_metrics}
par(mfcol=c(2,2), mar=c(5,5,3,5), las=1, cex.lab=1.5, cex.axis = 1.5, family = 'serif')
mycol = metadata$k13colors
mypch = as.numeric(metadata$PLA1 != 'WT')+1

#***** IBS *****
X = clas_scale_IBS$points
plot(-X[,1], X[,2], pch=mypch, bty='n', asp=1,
     col=mycol, xlab = 'PC1', ylab='PC2',main='')
mtext(text='A', side = 3, adj = 0, line=0.5, cex=1.5)

legend('topleft', legend = unique(metadata$Kelch) ,inset = 0.02, bg = 'white', 
       fill = brewer.pal(name = 'Set1', n = length(unique(metadata$Kelch))), 
       cex=1.3, title = expression(italic('Pfkelch13')))
legend('topright', legend = c('WT','Amplified'), title = expression(italic('Pfplasmepsin')),
       inset = 0.02, bg = 'white', cex=1.3, pch = 1:2)

#***** IBD *****
X = clas_scale_IBD$points
plot(-X[,1], X[,2], pch=mypch, bty='n',asp=1,
     col=mycol, xlab = 'PC1', ylab='PC2',main='')
mtext(text='B', side = 3, adj = 0, line=0.5, cex=1.5)

#***** -log_2 IBD *****
X = clas_scale_logIBD$points
plot(-X[,1], X[,2], pch=mypch, bty='n',asp=1,
     col=mycol, xlab = 'PC1', ylab='PC2',main='')
mtext(text='C', side = 3, adj = 0, line=0.5, cex=1.5)

#****** PCA on co-ancestry ***********
plot(-pcares$vectors[,1],pcares$vectors[,2],xlab = 'PC1', ylab='PC2',
     main='', pch=mypch, col = mycol, bty='n', asp=1)
mtext(text='D', side = 3, adj = 0, line=0.5, cex=1.5)

```


Does this differ with a different base in the log transformation? The answer is no.
```{r}
X = clas_scale_logIBD_robust$points
plot(-X[,1], X[,2], pch=mypch, bty='n',
     col=mycol, xlab = 'PC1', ylab='PC2',main='')
mtext(text='A', side = 3, adj = 0, line=0.5, cex=1.5)

X = clas_scale_logIBD_robust2$points
plot(-X[,1], X[,2], pch=mypch, bty='n',
     col=mycol, xlab = 'PC1', ylab='PC2',main='')
mtext(text='A', side = 3, adj = 0, line=0.5, cex=1.5)
```


How sensitive is the output of ChromoPainter to the "-n" argument (recombination scaling constant start-value (N_e; default=400000 divided by total number of haplotypes in <geno.filein>))?
We tried 0.1; 1 and 1000 (approx the default value for this many samples)

```{r}
chunkfile<-"RData/output_Ne_zeroone.chunkcounts.out" 
dataraw<-as.matrix(read.table(chunkfile,row.names=1,header=T,skip=0)) # read in the pairwise coincidence 
pcares1<-mypca(dataraw)

chunkfile<-"RData/output_Ne_one.chunkcounts.out" 
dataraw<-as.matrix(read.table(chunkfile,row.names=1,header=T,skip=0)) # read in the pairwise coincidence 
pcares2<-mypca(dataraw)

chunkfile<-"RData/output_Ne_thousand.chunkcounts.out" 
dataraw<-as.matrix(read.table(chunkfile,row.names=1,header=T,skip=0)) # read in the pairwise coincidence 
pcares3<-mypca(dataraw)

par(mfrow = c(1,3), las=1)

plot(-pcares1$vectors[,1],-pcares1$vectors[,2],xlab = 'PC1', ylab='PC2',
     main='N_e = 0.1', pch=mypch, col = mycol, bty='n', asp=1)
mtext(text='D', side = 3, adj = 0, line=0.5, cex=1.5)

plot(-pcares2$vectors[,1],-pcares2$vectors[,2],xlab = 'PC1', ylab='PC2',
     main='N_e = 1', pch=mypch, col = mycol, bty='n', asp=1)
mtext(text='D', side = 3, adj = 0, line=0.5, cex=1.5)

plot(-pcares3$vectors[,1],-pcares3$vectors[,2],xlab = 'PC1', ylab='PC2',
     main='N_e = 1000', pch=mypch, col = mycol, bty='n', asp=1)
mtext(text='D', side = 3, adj = 0, line=0.5, cex=1.5)

```


# Sensitivity to distance

We do hierachical agglomerative clustering on the three distance matrices using average linkage. 

```{r, include=T}
dist_matrices = list(IBD=as.dist(IBD_WG),
                     IBS=as.dist(IBS_WG),
                     IBD_neglog=as.dist(IBD_neglog2_WG))
link_methods = c('average')
dend_list = list()
i = 1
for(d in 1:length(dist_matrices)){
  for(m in link_methods){
    name_i = paste(names(dist_matrices)[d], m, sep='_')
    
    hh = fastcluster::hclust(d = dist_matrices[[d]], method = m)
    dend_list[[i]] = as.dendrogram(hh, hang = 0)
    labels(dend_list[[i]])= ''
    i = i+1
  }
}
```

```{r dendograms_distance_sensitivity}
mytitles = c('A','B','C')
my_color_bars = metadata$k13colors
mylabels = ''
xx = 18
dd = dend_list
par(mar=c(4,6,2,2), family = 'serif')
for(i in 1:length(dend_list)){

  dd = dend_list[[i]]
  plot(dd, main='', ylab='', yaxt='n')
  colored_bars(colors = my_color_bars, dend = dd,
               add=T, rowLabels = mylabels, cex=4.7)

  legend('topright', legend = unique(metadata$Kelch),
         fill = unique(metadata$k13colors),bty='n',inset=0.03,
         title = 'PfKelch', cex=2.4)

}
```


Aimee's suggestion for showing membership in the clusters as barplots
```{r barplot_cluster_stability_distance, fig.height=5, fig.width=9}
par(las=1, mfrow=c(1,3), family = 'serif', cex.axis=1.5, cex.lab=1.5)
graph_titles = c('A) 1-IBD', 'B) 1-IBS', expression('C) -log'[2]*' IBD'))

# These need to match up for the titles to be correct
print(graph_titles)
print(names(dist_matrices))

Cluster_number=c(3,6,9,12)

for(K_clusters in Cluster_number){
  mycols = brewer.pal(n = K_clusters, name = 'Paired')
  clusters_list = list()
  for(i in 1:length(dend_list)){
    dd = dend_list[[i]]
    clusters_list[[i]] = cutree(tree = dd, k = K_clusters)
    # This step just reverses the order of the numbering
    clusters_list[[i]] = mapvalues(x = clusters_list[[i]],
                                   from = as.numeric(names(sort(table(clusters_list[[i]])))),
                                   to = 1:K_clusters)
    print(table(clusters_list[[i]]))
  }
  
  # This is the `red-herring' barplot - one color per cluster
  barplot(table(clusters_list[[1]]), col = mycols, ylab = '',ylim=c(0,355))
  mtext(text= graph_titles[1], side = 3, adj = 0, line=0.5, cex=1.3)
  title(xlab = 'Cluster number',ylab = 'Number of isolates',cex.axis=1.5,cex.lab=1.5)
  
  for(Link_alg in 2:length(clusters_list)){
    cluster_prop = array(dim = c(K_clusters, K_clusters))
    # This loop computes the agreement between cluster membership
    # Complete agreement would result in non-zero entries for only row per column 
    # Doesn't have to be along the diagonal - but will be here due to ordering by size
    for(kk1 in 1:K_clusters) {
      for(kk2 in 1:K_clusters){
        ind1 = clusters_list[[1]] == kk1
        ind2 = clusters_list[[Link_alg]] == kk2
        cluster_prop[kk1,kk2] = sum(ind1 & ind2)
      }
    }
    barplot(cluster_prop, col = mycols, names.arg=1:K_clusters, ylab = '',xlab='',ylim=c(0,350))
    title(xlab = 'Cluster number', ylab = 'Number of isolates', cex.axis=1.5,cex.lab=1.5)
    mtext(text= graph_titles[Link_alg], side = 3, adj = 0, line=0.5, cex=1.3)
  }
}
```


# Sensitivity to linkage function

We do hierachical agglomerative clustering on -log 2 IBD using average linkage, complete linkage, single linkage and Ward's criterion.

```{r, include=F}
dist_matrices = list(IBD_neglog2_WG = as.dist(IBD_neglog2_WG))
link_methods = c('average', 'complete', 'single', 'ward.D')
dend_list = list()
HAC_list = list()
names_list = array(dim = length(dist_matrices)*length(link_methods))
i = 1
for(d in 1:length(dist_matrices)){
  for(m in link_methods){
    name_i = paste(names(dist_matrices)[d], m, sep='_')
    
    HAC_list[[i]] = fastcluster::hclust(d = dist_matrices[[d]], method = m)
    dend_list[[i]] = as.dendrogram(HAC_list[[i]], hang = 0)
    names_list[i] = name_i
    
    labels(dend_list[[i]])= ''
    i = i+1
  }
}

names(dend_list) = names_list
```


```{r dendograms_metrics_linkage}
mytitles = c('A','B','C','D')
my_color_bars = metadata$k13colors
mylabels = ''
xx = 18
dd = dend_list
par(mar=c(4,6,2,2), family = 'serif')
for(i in 1:length(dend_list)){
  dd = dend_list[[i]]
  plot(dd, main='', ylab='', yaxt='n')
  colored_bars(colors = my_color_bars, dend = dd,
               add=T, rowLabels = mylabels, cex=4.7)
  
  if(i ==4){
    legend('topright', legend = unique(metadata$Kelch),
           fill = unique(metadata$k13colors),bty='n',inset=0.03,
           title = 'PfKelch', cex=2.4)
  }
}
```


Random reorderings of dendrogram
```{r random_ordering_dendrogram, fig.height=8, fig.width=8}
par(mar = c(0,0,0,0))
dd1 = dend_list[[1]]
set.seed(476476)
dd2 = reorder(dend_list[[1]], wts = runif(N))
plot(dd1, main='', ylab='', yaxt='n')
legend('topleft', fill=unique(metadata$k13colors), legend = unique(metadata$k13Class), 
       title = 'Pfkelch', cex=1.25, inset=0.01)
colored_bars(colors = my_color_bars, dend = dd1,
               add=T, rowLabels = mylabels, cex=4.7)
plot(dd2, main='', ylab='', yaxt='n')
colored_bars(colors = my_color_bars, dend = dd2,
               add=T, rowLabels = mylabels, cex=4.7)
```


Aimee's suggestion for showing membership in the clusters as barplots
```{r barplot_cluster_stability, fig.height=5, fig.width=9}
mytitles = c('A','B','C','D')
mypch = 16*as.numeric(metadata$k13Class == 'C580Y') + 1
N = nrow(IBD_dist_matrix)
par(las=1, mfrow=c(1,4), family = 'serif', cex.axis=1.5,cex.lab=1.5)
graph_titles = c('Average linkage', 'Complete linkage', 'Single linkage', 'Ward criterion')
for(K_clusters in c(3,6,9,12)){
  clusters_list = list()
  for(i in 1:length(dend_list)){
    dd = dend_list[[i]]
    clusters_list[[i]] = cutree(tree = dd, k = K_clusters)
    clusters_list[[i]] = mapvalues(x = clusters_list[[i]], 
                                   from = as.numeric(names(sort(table(clusters_list[[i]])))), 
                                   to = 1:K_clusters)
    print(table(clusters_list[[i]]))
  }
  
  mycols = brewer.pal(n = K_clusters, name = 'Paired')
  
  
  barplot(table(clusters_list[[1]]), col = mycols, ylab = 'Number of isolates',ylim=c(0,375))
  mtext(text=paste(mytitles[1],') ', graph_titles[1], sep=''), 
        side = 3, adj = 0, line=0.5, cex=1.3)
  title(xlab = 'Cluster number')
  
  for(Link_alg in 2:length(clusters_list)){
    cluster_prop = array(dim = c(K_clusters, K_clusters))
    for(kk1 in 1:K_clusters) {
      for(kk2 in 1:K_clusters){
        ind1 = clusters_list[[1]] == kk1
        ind2 = clusters_list[[Link_alg]] == kk2
        cluster_prop[kk1,kk2] = sum(ind1 & ind2)
      }
    }
    barplot(cluster_prop, col = mycols, names.arg=1:K_clusters, ylab = 'Number of isolates',ylim=c(0,375))
    title(xlab = 'Cluster number')
    mtext(text=paste(mytitles[Link_alg],') ', graph_titles[Link_alg], sep=''), 
          side = 3, adj = 0, line=0.5, cex=1.3)
  }
}
```


# Heatmaps

```{r heatmaps_HAC, fig.height=3, fig.width=9}
par(mfrow=c(1,3), mar = c(1,2,3,1))

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBD_WG), method = 'average')))
image(z = t(IBD_WG[ind,ind]), x = 1:nrow(IBD_WG),y = 1:nrow(IBD_WG),
      breaks = seq(from = min(IBD_WG), to = max(IBD_WG), length.out = 10),
      col = rev(brewer.pal(n = 9, name = 'Purples')),
      xlab = '', ylab = '', xaxt='n', yaxt='n')
mtext(text='A) 1-IBD', side = 3, adj = 0, line=0.5, cex=1.3)

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBS_WG), method = 'average')))
image(z = t(IBS_WG[ind,ind]), x = 1:nrow(IBD_WG),y = 1:nrow(IBD_WG),
      breaks = seq(from = min(IBS_WG), to = max(IBS_WG), length.out = 10),
      col = rev(brewer.pal(n = 9, name = 'Purples')),
      xlab = '', ylab = '', xaxt='n', yaxt='n')
mtext(text='B) 1-IBS', side = 3, adj = 0, line=0.5, cex=1.3)

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBD_neglog2_WG), method = 'average')))
image(z = t(1-IBD_neglog2_WG[ind,ind]), x = 1:nrow(IBD_WG),y = 1:nrow(IBD_WG),
      breaks = seq(from = min(IBD_neglog2_WG), to = max(IBD_neglog2_WG), length.out = 10),
      col = rev(brewer.pal(n = 9, name = 'Purples')),
      xlab = '', ylab = '', xaxt='n', yaxt='n')
mtext(text= expression('C) -log'[2]*' IBD'), side = 3, adj = 0, line=0.5, cex=1.3)

```

```{r heatmaps_labelled, fig.height=5, fig.width=5}

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBD_WG), 
                                                         method = 'average')))
heatmap.2(x = IBD_WG[ind,ind], Rowv = F, Colv = F, dendrogram = "none",
          breaks = seq(from = min(IBD_WG), to = max(IBD_WG), length.out = 10),
          col = rev(brewer.pal(n = 9, name = 'Purples')), trace = 'none',
          ColSideColors=metadata$k13colors[ind], RowSideColors = metadata$Plasmepcolors[ind],
          key=F, labRow = NA, labCol = NA)

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBS_WG), 
                                                         method = 'average')))
heatmap.2(x = IBS_WG[ind,ind], Rowv = F, Colv = F, dendrogram = "none",
          breaks = seq(from = min(IBS_WG), to = max(IBS_WG), length.out = 10),
          col = rev(brewer.pal(n = 9, name = 'Purples')), trace = 'none',
          ColSideColors=metadata$k13colors[ind], RowSideColors = metadata$Plasmepcolors[ind],
          key=F, labRow = NA, labCol = NA)

ind = order.dendrogram(as.dendrogram(fastcluster::hclust(d = as.dist(IBD_neglog2_WG), 
                                                         method = 'average')))
heatmap.2(x = IBD_neglog2_WG[ind,ind], Rowv = F, Colv = F, dendrogram = "none",
          breaks = seq(from = min(IBD_neglog2_WG), to = 2, length.out = 10),
          col = rev(brewer.pal(n = 9, name = 'Purples')), trace = 'none',
          ColSideColors=metadata$k13colors[ind], RowSideColors = metadata$Plasmepcolors[ind],
          key=F, labRow = NA, labCol = NA)
```