Comparisons_to_existing_methods.Rmd

---
title: "Comparing DIOgene to other existing implementations for integrative GRN inference"
output: 
  html_document:
    df_print: paged
    toc: true
    toc_float: true
    toc_depth: 2
    code_folding: hide
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = F, message = F, fig.width = 10)

source('inference_functions/weightedRF.R')
source('inference_functions/weightedLASSO.R')
source("inference_functions/mLASSO_StARS.R")
source('inference_functions/evaluateNetwork.R')
source('inference_functions/data_integration_optimization.R')

library(ggplot2)
library(tidyverse)
library(ggpubr)
library(patchwork)
library(ggVennDiagram)
library(ComplexHeatmap)
library(clusterProfiler)
library(circlize)
library(MESS)
```

Data loading : 

```{r}
# building expression
load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
tfs <- input_data$grouped_regressors; length(tfs)
counts <- input_data$counts; dim(counts)
# x <- t(counts)[,genes]

# building prior (for three values of alpha: 0, 0.5, 1)
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)

alphas <- c(0, 0.5, 1)

nCores <- 45
N_rep = 3
```

# Non linear case: comparison to iRafNet

Differences between iRafNet and weightedRF:

+ Importance metric is Node Purity in iRafNet, vs MSE increase after permutations on OOB for weightedRF
+ iRafNet has to scale the features to get appropriate feature importance


```{r, eval = F}
# orifinal iRafNet, simply with the MSE having been added as an output
# library(iRafNet)


results <- NULL
for(alpha in alphas){
  for(rep in 1:N_rep){
    # computing models for iRafNet and weightedRF
    importances_iRafNet <- weightedRF_inference(counts, genes, tfs, alpha = alpha, nTrees = 2000,
                                         importance = "IncNodePurity", pwm_occurrence = pwm_occurrence, 
                                         nCores = nCores)
  
    importances_wRF <- weightedRF_inference(counts, genes, tfs, alpha = alpha, nTrees = 2000,
                                           pwm_occurrence = pwm_occurrence, 
                                           nCores = nCores)
    
    # building sparse GRNs and evaluating them against DAP-Seq
    pr_curve_iRafNet <- evaluate_fully_connected(importances_iRafNet[tfs,], 
                                                 pwm_occurrence = pwm_occurrence,
                              input_genes = genes, input_tfs = tfs, 
                                             validation = c("DAPSeq"),
                                             nCores =nCores) %>%
      mutate(alpha = alpha, method = "iRafNet", rep = rep,
             mse = median(importances_iRafNet["mse",], na.rm = T),
             aupr = MESS::auc(recall, precision, type = 'spline'))
    
    pr_curve_wRF <- evaluate_fully_connected(importances_wRF[tfs,], 
                                             pwm_occurrence = pwm_occurrence,
                              input_genes = genes, input_tfs = tfs, 
                                             validation = c("DAPSeq"),
                                             nCores =nCores) %>%
      mutate(alpha = alpha, method = "weightedRF", rep = rep,
             mse = median(importances_wRF["mse",], na.rm = T),
             aupr = MESS::auc(recall, precision, type = 'spline'))
    
    if(is.null(results)) results <- rbind.data.frame(pr_curve_iRafNet, pr_curve_wRF)
    else results <- rbind(results, rbind.data.frame(pr_curve_iRafNet, pr_curve_wRF))
  }
}

save(results, file = "results/comparisons_to_existing_methods/irafnet_weightedRF_comparison_reps.rdata")


```

Showing the results:

```{r, fig.width=20}

load("results/comparisons_to_existing_methods/irafnet_weightedRF_comparison_reps.rdata")

plot_rf <- (results %>%
              mutate(case = "Non linear case") %>%
              group_by(method, alpha) %>%
              mutate(aupr = mean(aupr, na.rm = T)) %>%
              mutate(aupr_pos = ifelse(method == "weightedRF", 0.75, 0.25)) %>%
              ungroup() %>%
              group_by(method, alpha, network_name) %>%
              mutate(mean_precision =mean(precision),
             mean_recall = mean(recall)) %>%
  ggplot(aes(y=precision, x = recall, color = method)) +
  geom_point(size = 0.5) + geom_line()+
  ggh4x::facet_nested_wrap(vars(case, paste("alpha =", alpha)), nest_line = T)+
    geom_text(aes(y=0.5, x=aupr_pos, label = paste("AUPR =",round(aupr, 2))), show.legend = F) )/(
results %>%
   mutate(case = "Non linear case") %>%
  group_by(alpha, method) %>%
  mutate(mean_mse = mean(mse, na.rm = T)) %>%
  ggplot(aes(y=mean_mse, x =method, fill = method)) +
  geom_text(aes(label = round(mean_mse, 3), y=0.12, color = method), 
             nudge_x = 0.275, fill = "white", show.legend = F)+
  geom_segment(aes(xend = method, y = 0, yend = mean_mse, color = method),size = 2.5) + 
  geom_point(aes(y=mse), size = 0.85, color = "black")+ ylab("Median MSE")+
  ggh4x::facet_nested_wrap(vars(case, paste("alpha =", alpha)), nest_line = T))+
  xlab("")


plot_rf <- plot_rf & scale_color_manual(name = "Method", values = c("#668877", "#70AD47"))& 
  scale_fill_manual(name = "Method", values = c("#668877", "#70AD47"))&
  theme_bw() & theme(strip.background = element_blank());plot_rf

```

Adding DIOgene to these results:

```{r}

load("results/rdata/gene_specific/gene_specific_mse_true_sd.rdata")
mse_diogene_rf <-median(rowMeans(lmses[str_detect(names(lmses), "RF_trueData")]))

mses_with_diogene_rf <- results %>%
   mutate(case = "Non linear case") %>%
  group_by(alpha, method) %>%
  mutate(mean_mse = mean(mse, na.rm = T)) %>%
  select(case, method, mean_mse, alpha, mse) %>%
  ungroup() %>%
  mutate(alpha = as.character(alpha)) %>%
  add_row(case = "Non linear case", method = "DIOgene", alpha = "Gene-specific",
          mean_mse = mse_diogene_rf, mse = mse_diogene_rf)


```


# Linear case : closest implementation to mLASSO-stars

Differences between weightedLASSO and mLASSO-StARS:

+ Glm family is Poisson for weightedLASSO, and Gaussian (following log transform) for mLASSO-StARS 
+ Sparsity selection is done by cv.glmnet in weightedLASSO (lambda 1se), and by the lasso.stars R package in mLASSO-StARS
+ The importance metric is the MSE increase after permutations on OOB for weightedLASSO, and selection frequency + partial correlation for mLASSO-StARS.

```{r}
results <- NULL

# importances <- list()

for(alpha in alphas[2:3]){
  
  for(rep in 1:N_rep){
    # computing models for mLASSO_stars and weightedLASSO
    importances_mLASSO_stars <- mLASSO_stars_inference(counts, genes, tfs, alpha = alpha, 
                                                       N_boot = 5, N_stars_ss = 5,
                                                       stars.thresh = 0.05,
                                                       pwm_occurrence = pwm_occurrence, 
                                                       nCores = nCores)
    
    importances[[paste0("mLASSO.StARS_",alpha, "_", rep)]] <- importances_mLASSO_stars
  
    importances_wLASSO <- weightedLASSO_inference(counts, genes, tfs, alpha = alpha,
                                                  N = 25, pwm_occurrence = pwm_occurrence,
                                                  nCores = nCores)

    # building sparse GRNs and evaluating them against DAP-Seq
    pr_curve_mLASSO_stars <- evaluate_fully_connected(importances_mLASSO_stars[tfs,],
                                                 pwm_occurrence = pwm_occurrence,
                              input_genes = genes, input_tfs = tfs,
                                             validation = c("DAPSeq"),
                                             nCores =10) %>%
      mutate(alpha = alpha, method = "mLASSO_stars", rep = rep,
             mse = median(importances_mLASSO_stars["mse",], na.rm = T),
             aupr = MESS::auc(recall, precision, type = 'spline'))

    pr_curve_wLASSO <- evaluate_fully_connected(importances_wLASSO[tfs,],
                                             pwm_occurrence = pwm_occurrence,
                              input_genes = genes, input_tfs = tfs,
                                             validation = c("DAPSeq"),
                                             nCores =10) %>%
      mutate(alpha = alpha, method = "weightedLASSO", rep = rep,
             mse = median(importances_wLASSO["mse",], na.rm = T),
             aupr = MESS::auc(recall, precision, type = 'spline'))


    if(is.null(results)) results <- rbind.data.frame(pr_curve_wLASSO, pr_curve_mLASSO_stars)
    else results <- rbind(results, rbind.data.frame(pr_curve_wLASSO, pr_curve_mLASSO_stars))
  }
}

save(results, file = "results/mLASSO_stars_weightedLASSO_comparison_reps.rdata")


```
```{r, eval=FALSE}
save(importances, file = "results/rdata/mLASSO_stars_importances_10reps.rdata")
```

Adding weightedEN to the linear comparison

```{r, fig.height=15}
load(file = "results/mLASSO_stars_weightedLASSO_comparison_reps.rdata")
all_results <- results

# get 10 weightedEN matrices for three alphas
load("results/rdata/weightedEN_importances_50rep.rdata")

for(alpha in c(0,0.5,1)){
  mats_10 <- mats[paste0("EN_",alpha,"_trueData_",1:10)]
  mat_mean <- apply(simplify2array(mats_10), 1:2, mean) 
  
  
  eval <- evaluate_fully_connected(mat_mean[tfs,],
                                        pwm_occurrence = pwm_occurrence,
                              input_genes = genes, input_tfs = tfs, 
                                             validation = c("DAPSeq"),
                                             nCores =10) %>%
  mutate(aupr = MESS::auc(recall, precision, type = 'linear'),
         mse = median(mat_mean["mse",]), rep = 1,
         method = "weightedEN", alpha = alpha)
  
  
  all_results <- rbind.data.frame(all_results, eval)
}


# add 
all_results <- all_results %>% # maybe filter on small densities?
  group_by(method, alpha, rep) %>%
  mutate(aupr_linear = MESS::auc(recall, precision, type = 'linear'))


mse_min <- 0
mse_max <- 0.3
comps_en <- (all_results %>%
    mutate(color = ifelse(method == "iRafNet" | method == "mLASSO_stars" , "ref",
                          ifelse((method == "weightedLASSO" |method == "weightedEN" | method == "weightedRF") & 
                                   alpha == "DIOgene", "diogene", "weighted")),
           regression = ifelse(str_detect(method, "LASSO")|str_detect(method, "EN"), "Linear case", "Non linear case")) %>%
              group_by(method, alpha) %>%
              mutate(aupr = mean(aupr_linear, na.rm = T))  %>%
      group_by(method, alpha, network_name) %>%
      mutate(mean_precision =mean(precision),
             mean_recall = mean(recall)) %>%
              mutate(aupr_pos_x = ifelse(str_detect(method, "weighted"), 0.75, 0.25),
                     aupr_pos_y = ifelse(str_detect(method, "EN"), 0.475, 0.5)) %>%
  ggplot(aes(y=precision, x = recall, color = interaction(method, regression))) +
  geom_point(size = 0.35, show.legend = T) + 
    geom_line(aes(y=mean_precision, x = mean_recall,
                  group = interaction(method, regression)), show.legend = T)+
  ggh4x::facet_nested_wrap(vars(regression, paste("alpha =", alpha)), nest_line = T, ncol = 4)+
    theme(legend.position = "top") +
    geom_text(aes(y=aupr_pos_y, x=aupr_pos_x, label = paste("AUPR =",round(aupr, 2))), show.legend = F))/
  (all_results %>%
    mutate(color = ifelse(method == "iRafNet" | method == "mLASSO_stars" , "ref",
                          ifelse((method == "weightedLASSO" | method == "weightedEN" | method == "weightedRF") & 
                                   alpha == "DIOgene", "diogene", "weighted")),
           regression = ifelse(str_detect(method, "LASSO")|str_detect(method, "EN"), "Linear case", "Non linear case"))%>%
              group_by(alpha, method) %>%
  mutate(mean_mse = mean(mse, na.rm = T),
         mean_mse_to_plot = ifelse(mean_mse > mse_max, mse_max, mean_mse),
         mse_label_y = mean_mse_to_plot,
         label_nudge = ifelse(mean_mse > mse_max, -0.05, 0.05)) %>%
  ggplot(aes(y=mean_mse, x =method, fill = method)) +
   ylab("Median MSE") + xlab("")+ ylim(c(mse_min,mse_max))+
  geom_segment(aes(xend = method, y = mse_min, yend = mean_mse_to_plot, 
                   color = interaction(method, regression)),size = 2.5) + 
  geom_point(aes(y=mse), size = 0.85, color = "black", show.legend = F)+
   geom_text(aes(label = round(mean_mse, 3), y=0.12, color = interaction(method, regression)), 
             nudge_x = 0.275, fill = "white", show.legend = F)+
  ggh4x::facet_nested_wrap(vars(regression, paste("alpha =", alpha)), 
                           nest_line = T, ncol = 4, scales = "free_x")) & 
  scale_color_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))& 
  scale_fill_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))& 
  theme_bw() & theme(strip.background = element_blank(), 
                     legend.position = "right"); comps_en

```


Results:

```{r}

load(file = "results/mLASSO_stars_weightedLASSO_comparison_reps.rdata")


plot_lasso <- (results %>%
              group_by(method, alpha) %>%
              mutate(aupr = mean(aupr, na.rm = T)) %>%
              mutate(aupr_pos = ifelse(method == "weightedLASSO", 0.75, 0.25)) %>%
  ggplot(aes(y=precision, x = recall, color = method)) +
  geom_point(size = 0.5) + geom_smooth(method = "gam", aes(fill = method), alpha = 0.2)+
  ggh4x::facet_nested_wrap(vars(paste("alpha =", alpha)), nest_line = T)+
    geom_text(aes(y=0.5, x=aupr_pos, label = paste("AUPR =",round(aupr, 2))), show.legend = F) )/(
results %>%
  group_by(alpha, method) %>%
  mutate(mean_mse = mean(mse, na.rm = T)) %>%
  ggplot(aes(y=mean_mse, x =method, fill = method)) +
  geom_segment(aes(xend = method, y = 0, yend = mean_mse, color = method),size = 2.5) + 
  geom_point(aes(y=mse), size = 0.85, color = "black")+
  ggh4x::facet_nested_wrap(vars(paste("alpha =", alpha)), nest_line = T));plot_lasso


plot_lasso <- plot_lasso & scale_color_manual(values = c("#668877", "#70AD47"))& 
  scale_fill_manual(values = c("#668877", "#70AD47"))&
  theme_bw() & theme(strip.background = element_blank());plot_lasso


ggexport(plot_lasso, filename = "results/comparisons_to_existing_methods/linear_comparison.pdf", 
         width = 10, height = 6)
```


What is the complexity/sparsity of the different linear models?

```{r, fig.width=15}
load("results/rdata/weightedEN_importances_50rep.rdata")
mats_en <- mats

load("results/rdata/weightedLASSO_importances_50rep.rdata")
mats_lasso <- mats

load("results/rdata/mLASSO_stars_importances_10reps.rdata")
mats_stars <- importances


results <- NULL
ALPHAS <- c(0,0.5,1)
  
for(alpha in ALPHAS){
  # weightedEN
  mats_10_en <- mats_en[paste0("EN_",alpha,"_trueData_",1:10)]
  mat_mean_en <- apply(simplify2array(mats_10_en), 1:2, mean)
  
  selection <- mat_mean_en != 0
  
  to_add <- data.frame(model = "EN", alpha = alpha, 
                       nb_tf = c(colSums(selection)),
                       mean_nb_tf = mean(colSums(selection)), 
                       sd_nb_tf = sd(colSums(selection)))
  
  if(is.null(results))
    results <- to_add
  else
    results <- rbind.data.frame(results, to_add)
  
  # weightedLASSO
  mats_10_lasso <- mats_lasso[paste0("LASSO_",alpha,"_trueData_",1:10)]
  mat_mean_lasso <- apply(simplify2array(mats_10_lasso), 1:2, mean)
  
  selection <- mat_mean_lasso != 0
  
  to_add <- data.frame(model = "LASSO", alpha = alpha, 
                       nb_tf = c(colSums(selection)),
                       mean_nb_tf = mean(colSums(selection)), 
                       sd_nb_tf = sd(colSums(selection)))
  results <- rbind.data.frame(results, to_add)
  
  # mLASSO-StARS
  mats_10_stars<- mats_stars[paste0("mLASSO.StARS_",alpha,"_",1:3)]
  mat_mean_stars <- apply(simplify2array(mats_10_stars), 1:2, mean)
  
  selection <- mat_mean_stars > 0.3
  
  to_add <- data.frame(model = "mLASSO-StARS", alpha = alpha, 
                       nb_tf = c(colSums(selection)),
                       mean_nb_tf = mean(colSums(selection)), 
                       sd_nb_tf = sd(colSums(selection)))
  results <- rbind.data.frame(results, to_add)
}


model_complexity_hist <- results %>%
  filter(alpha %in% c(0,0.5,1)) %>%
  mutate(model = str_replace(model, "^LASSO", "weightedLASSO"),
         model = str_replace(model, "^EN", "weightedEN"),
         alpha = paste0("alpha = ", alpha)) %>%
  ggplot(aes(x= nb_tf, col = model, fill = model))+
  xlab("Number of regulators selected at least once") +
  facet_nested_wrap(vars(as.factor(alpha)), ncol = 12)+
  geom_density(alpha = 0.5, size = 1.25)+scale_fill_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))+
  scale_color_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))+
  theme_bw()+theme(strip.background = element_blank());model_complexity_hist

```

MSE of restricted models:

```{r, fig.width=15}
load("results/rdata/mLASSO_stars_importances_10reps.rdata")
mats_stars <- importances

load("results/rdata/weightedEN_importances_50rep.rdata")
mats_en <- mats

load("results/rdata/weightedLASSO_importances_50rep.rdata")
mats_lasso <- mats

results <- NULL
alphas = c(0,0.5,1)

for(alpha in alphas){
  
  # weightedEN
  mats_10_en <- mats_en[paste0("EN_",alpha,"_trueData_",1:10)]
  mat_mean_en <- apply(simplify2array(mats_10_en), 1:2, mean)
  
  grn_en <- weightedLASSO_network_gene_topN(mat_mean_en, topN = 3,
                                  pwm_occurrence = pwm_occurrence, 
                                  genes = genes, tfs = tfs)

  mses <- weightedLASSO_predictions(counts, genes, tfs, edges = grn_en, 
                                    pwm_occurrence = pwm_occurrence, EN_param = 0.1,
                                    N = 25, nCores = 40, alpha = alpha)
  
  to_add <- data.frame(model = "EN", alpha = alpha, 
                       mse = c(mses), 
                       strategy = "global")
  
  if(is.null(results))
    results <- to_add
  else
    results <- rbind.data.frame(results, to_add)
  
  # weightedLASSO
  mats_10_lasso <- mats_lasso[paste0("LASSO_",alpha,"_trueData_",1:10)]
  mat_mean_lasso <- apply(simplify2array(mats_10_lasso), 1:2, mean)
  
  grn_lasso <- weightedLASSO_network_gene_topN(mat_mean_lasso, topN = 3,
                                  pwm_occurrence = pwm_occurrence, 
                                  genes = genes, tfs = tfs)

  mses <- weightedLASSO_predictions(counts, genes, tfs, edges = grn_lasso, 
                                    pwm_occurrence = pwm_occurrence, EN_param = 1,
                                    N = 25, nCores = 40, alpha = alpha)
  
  to_add <- data.frame(model = "LASSO", alpha = alpha, 
                       mse = c(mses),
                       strategy = "global")
  
  results <- rbind.data.frame(results, to_add)
  
  # mLASSO STARS (3 replicates only)
  mats_10_stars <- mats_stars[paste0("mLASSO.StARS_",alpha,"_",1:3)]
  mat_mean_stars <- apply(simplify2array(mats_10_stars), 1:2, mean)
  
  grn_stars <- weightedLASSO_network_gene_topN(mat_mean_stars, topN = 3,
                                  pwm_occurrence = pwm_occurrence, 
                                  genes = genes, tfs = tfs)

  mses <- weightedLASSO_predictions(counts, genes, tfs, edges = grn_stars, 
                                    pwm_occurrence = pwm_occurrence, EN_param = 1,
                                    N = 25, nCores = 40, alpha = alpha)
  
  to_add <- data.frame(model = "mLASSO.StARS", alpha = alpha, 
                       mse = c(mses),
                       strategy = "global")
  
  results <- rbind.data.frame(results, to_add)
}
save(results, file = "results/comparisons_to_existing_methods/restricted_mse_results_linear.rdata")

load("results/comparisons_to_existing_methods/restricted_mse_results_linear.rdata")

# restricted MSES at 3 top regulators for linear DIogene:
load("results/rdata/gene_specific/gene_specific_grns_student.rdata")
mats <- mats[str_detect(names(mats),"true")]
mats <- mats[str_detect(names(mats),"LASSO")]
mat_mean_lasso_diogene <- apply(simplify2array(mats), 1:2, mean)

diogene_grn_lasso <- weightedLASSO_network_gene_topN(mat_mean_lasso_diogene, topN = 3,
                                pwm_occurrence = pwm_occurrence, 
                                genes = genes, tfs = tfs)

load("results/rdata/gene_specific/alpha_per_gene_weightedLASSO_student.rdata")

mses_lasso_restricted <- weightedLASSO_predictions(counts, genes, tfs, edges = diogene_grn_lasso, 
                                  pwm_occurrence = pwm_occurrence, EN_param = 1,
                                  N = 25, nCores = 40, alpha = stud_lasso)

mse_diogene_student <- mses_lasso_restricted

load("results/comparisons_to_existing_methods/restricted_mse_top3_results_linear.rdata")


restricted_mses_with_diogene <- rbind.data.frame(results, data.frame(model = "LASSO", alpha = "Gene-specific", 
                       mse = c(mses_lasso_restricted),
                       strategy = "DIOgene")) %>%
   mutate(model = str_replace(model, "EN", "weightedEN"),
         model = str_replace(model, "^LASSO", "weightedLASSO"))%>%
  mutate(mse_to_plot = ifelse(mse>0.3, 0.3, mse))


restricted_mse <- results %>%
  mutate(model = str_replace(model, "EN", "weightedEN"),
         model = str_replace(model, "^LASSO", "weightedLASSO"))%>%
  group_by(model, alpha) %>%
  summarise(mse = median(mse, na.rm = T)) %>%
  mutate(mse_to_plot = ifelse(mse>0.3, 0.3, mse)) %>%
  ggplot(aes(x=model, y = mse_to_plot, fill = model))+
  geom_bar(stat = "identity", width = 0.1)+ ylab("Restricted median MSE")+ 
  geom_point(color = "black", aes(y=mse))+
   geom_text(aes(label = round(mse, 3), y=0.12, color = model), 
             nudge_x = 0.275, fill = "white", show.legend = F)+
  ggh4x::facet_nested_wrap(vars(paste("alpha =", alpha)), nest_line = T, ncol = 3)+
  ylim(c(0,0.3))+theme_bw()+scale_fill_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))+
  scale_color_manual(name = "Method",
                     values = c("#668877", "darkgreen", "#70AD47"), 
                     labels = c("mLASSO-StARS", "weightedEN", "weightedLASSO"))+
  theme(strip.background = element_blank(),
        legend.position = "right");restricted_mse

```
Plot line to add to main figure:

```{r, fig.height=3, fig.width=10}

data_for_restricted_mse_top_3 <- rbind.data.frame(restricted_mses_with_diogene %>%
  mutate(case = "Linear case") %>%
  mutate(mean_mse = mse,
         model = ifelse(alpha == "Gene-specific", "DIOgene", model)),
  mses_with_diogene_rf %>%
  mutate(case = "Non-linear Case") %>%
  rename(model = method) %>%
  mutate(mse_to_plot = mse) %>%
    distinct(mean_mse, .keep_all = T))
  
  
data_for_restricted_mse_top_3[10,"mse"] <- median(mse_diogene_student)
data_for_restricted_mse_top_3[10,"mean_mse"] <- median(mse_diogene_student)
data_for_restricted_mse_top_3[10,"mse_to_plot"] <- median(mse_diogene_student)

save(data_for_restricted_mse_top_3, file = "results/comparisons_to_existing_methods/restricted_mse_top3_results_linear.rdata")
  
  
load("results/comparisons_to_existing_methods/restricted_mse_top3_results_linear.rdata")

  data_for_restricted_mse_top_3 %>%
ggplot(aes(y=mse_to_plot, x =model, fill = model)) +
  geom_segment(aes(xend = model, y = 0, yend = mse, color = model),size = 2.5) + 
  geom_label(aes(label = round(mean_mse, 2), y=0.12, color = model), 
             nudge_x = 0.2, fill = "white", show.legend = F)+
  ylab("Restricted median MSE")+ 
  geom_point(color = "black", aes(y=mse), show.legend = F)+
  ggh4x::facet_nested_wrap(vars(case, paste("alpha =", alpha)), 
                           nest_line = T, scales = "free_x", ncol = 8)+
  scale_color_manual(name = "Method", values = c( "#4670CD", "#668877", "darkgreen", 
                                                  "#668877", "#70AD47", "#70AD47" ))+
  scale_fill_manual(name = "Method", values = c( "#4670CD", "#668877", "darkgreen", 
                                                  "#668877", "#70AD47", "#70AD47" ))+
  theme_bw() + theme(strip.background = element_blank())
                       
  
```


Final comparison plot:

```{r}
final_comp <- plot_rf + comps_en + model_complexity_hist + restricted_mse + 
  plot_layout(heights = c(0.33, 0.33,1, 0.33, 0.33))+plot_annotation(tag_levels = "a")


ggexport(final_comp, 
         filename = "results/comparisons_to_existing_methods/final_comparison.pdf", 
         width = 12, height = 16)
```