Skip to content

Commit

Permalink
Update gapfill algorithm
Browse files Browse the repository at this point in the history
- Code was slightly modified to make sure that the initial gap-fill pFBA
solution is actually a feasible solution.
- In case infeasible solutions are returned, the LP is solved again with
lowered bound violation tolerance (refers to #232).

Some further changes:
- During gapfilling, the reported numbers of added reactions now does
not count added exchange reactions
- more conistent checking of a returned FBA solution is actually
feasible
  • Loading branch information
Waschina committed Aug 30, 2024
1 parent 354989c commit 568dfa8
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 65 deletions.
2 changes: 1 addition & 1 deletion gapseq_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
- r-stringi
- r-getopt
- r-r.utils
- r-cobrar
- r-cobrar >=0.1.1
- r-biocmanager
- bioconductor-biostrings
- r-jsonlite
Expand Down
5 changes: 2 additions & 3 deletions src/adapt.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ suppressMessages(library(tools))

# select solver
if( "cobrarCPLEX" %in% rownames(installed.packages()) ){
COBRAR_SETTINGS("SOLVER","cplex"); ok <- 1
COBRAR_SETTINGS("SOLVER","cplex"); stat <- c(1,2)
}else{
warning("glpk is used as LP solver but cplex is recommended because it is much faster. If you have IBM's ILOG cplex installed, you can add cplex-support to gapseq by installing the R-package 'cobarCPLEX' (https://github.com/Waschina/cobrarCPLEX).")
COBRAR_SETTINGS("SOLVER","glpk"); ok <- 0
COBRAR_SETTINGS("SOLVER","glpk"); stat <- c(2,5)
}

# Setting defaults if required
Expand Down
83 changes: 58 additions & 25 deletions src/gapfill4.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,
dummy.weight = 100, script.dir, core.only = FALSE, verbose=verbose, gs.origin = NA, rXg.tab, env = "") {

presRxnWeight <- 0.00001 # Reaction weight for pFBA for reactions already present in model or exchange/demand reactions

# backup model
mod.orig.bak <- mod.orig

Expand Down Expand Up @@ -63,14 +65,14 @@ gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,
rxn.weights = rxn.weights,
growth.rate = 0))
}
if(sol@ok != ok){
warning("FBA on original model did not end successfully! Zero solution is feasibile! But gapfilling will start anyway.")
if(sol@stat %notin% stat){
warning("FBA on original model did not end successfully solution is infeasibile. Gapfilling will start anyway.")
}

sol <- fba(mod)
gr.dummy <- sol@obj
if(sol@ok!=ok){
warning(paste0("Full model is already not able to yield a feasible solution There's no way to successful gap-filling."))
if(sol@stat %notin% stat){
warning(paste0("Full model is already not able to yield a feasible solution. There's no way to successful gap-filling."))
return(list(model = mod.orig.bak,
rxns.added = c(),
rxn.weights = rxn.weights,
Expand All @@ -79,33 +81,65 @@ gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,

c.coef.dt <- data.table(id = gsub("_.0$","",mod@react_id))
c.coef.dt <- merge(c.coef.dt, mseed[,.(id, weight, core.rxn)], by.x="id", by.y="id", all.x = T, sort = F)
c.coef.dt[is.na(weight), weight := 0.00001]
c.coef.dt[id %in% pres.rxns, weight := 0.00001]
c.coef.dt[is.na(weight), weight := presRxnWeight]
c.coef.dt[id %in% pres.rxns, weight := presRxnWeight]
c.coef.dt[, candRxn := id %notin% pres.rxns & !grepl("^EX|^DM|^bio1$",id)] # potential new data base reactions for the model

if(gs.origin==1) {
sol.tmp <- 0
max.iter <- 100
max.iter <- 15
n.iter <- 1
pFBAcoeff <- 1e-3
tol_bnd <- COBRAR_SETTINGS("TOLERANCE"); min_tol_bnd <- 1e-9
gf.success <- FALSE
mod@lowbnd[which(mod@obj_coef == 1)] <- min.gr # enforce minimum required growth rate
while(sol.tmp <= 0 & n.iter <= max.iter) {
while(!gf.success) {
# max iterations reached
if(n.iter > max.iter) {
stop("Initial gap-filling using all reactions was not successful although all available reactions were considered. Please check your gap-filling medium or consider reducing the minimum required growth rate.")
}

# otherwise, let's try
COBRAR_SETTINGS("TOLERANCE",tol_bnd)
sol.fba <- pfbaHeuristic(mod, costcoeffw = c.coef.dt$weight,
pFBAcoeff = pFBAcoeff)

n.iter <- n.iter + 1
if(sol.fba@ok==ok)
sol.tmp <- sol.fba@obj
if(sol.fba@obj <= 0.001)
sol.tmp <- 0
if(sol.tmp <= 0)
flxbm <- sol.fba@fluxes[which(mod@obj_coef == 1)]

if(sol.fba@stat %in% stat && flxbm > tol_bnd) {
# pFBA says its feasible, but let's check if solution is actually valid
mod.reduced <- rmReact(mod, react = mod@react_id[which(sol.fba@fluxes == 0)])
sol.reduced <- fba(mod.reduced)
flxbm.reduced <- sol.reduced@fluxes[which(mod.reduced@obj_coef == 1)]
if(sol.reduced@stat %notin% stat && tol_bnd > min_tol_bnd) {
tol_bnd <- max(tol_bnd / 2, min_tol_bnd)
cat("No feasible gap-fill solution found due to numeric issues. Adjusting bound tolerance to",tol_bnd,"\n")
next
} else if(sol.reduced@stat %notin% stat && tol_bnd <= min_tol_bnd) {
pFBAcoeff <- pFBAcoeff / 2
cat("No feasible gap-fill solution found due to numeric issues. Relaxing pFBA coefficient 'c' to",pFBAcoeff,"\n")
next
} else {
gf.success <- TRUE
next
}
} else if (sol.fba@stat %in% stat && flxbm <= tol_bnd) {
# pFBA says its feasible, but growth rate is zero. Let's try with relaxed pFBA
pFBAcoeff <- pFBAcoeff / 2
cat("pFBA feasible but with growth rate of 0. Relaxing pFBA coefficient 'c' to",pFBAcoeff,"\n")
next
}

# pFBA says its infeasible. Let's try with relaxed pFBA.
pFBAcoeff <- pFBAcoeff / 2
cat("No feasible gap-fill solution found. Relaxing pFBA coefficient 'c' to",pFBAcoeff,"\n")
}
} else {
sol.fba <- pfbaHeuristic(mod, costcoeffw = c.coef.dt$weight,
pFBAcoeff = 1e-3)
}

if(sol.fba@ok!=ok){
if(sol.fba@stat %notin% stat){
warning("pFBA did not end successfully.")
return(list(model = mod.orig.bak,
rxns.added = c(),
Expand All @@ -114,18 +148,17 @@ gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,
}

# Retrieve list of utilized dummy reactions and add them to the original/draft model
# TODO: Make this differently: How to find out which rxns are candidate reactions
ko.dt <- data.table(dummy.rxn = c.coef.dt[weight>0.00001,id],
d.rxn.ind = which(c.coef.dt$weight>0.00001),
dummy.weight = c.coef.dt[weight>0.00001,weight],
flux = sol.fba@fluxes[which(c.coef.dt$weight>0.00001)])
ko.dt <- data.table(dummy.rxn = c.coef.dt[candRxn == TRUE,id],
d.rxn.ind = which(c.coef.dt$candRxn == TRUE),
dummy.weight = c.coef.dt[candRxn == TRUE,weight],
flux = sol.fba@fluxes[which(c.coef.dt$candRxn == TRUE)])


ko.dt <- ko.dt[abs(flux) > 0]
ko.dt[, core := gsub("_.0","",dummy.rxn) %in% mseed[core.rxn==T,id]]
cat("Utilized candidate reactions: ",nrow(ko.dt))

if( nrow(ko.dt) == 0){ # no dummy reactions is needed
if(nrow(ko.dt) == 0){ # no dummy reactions are needed
warning("No dummy reactions utilized in full model. Nothing to add.")
return(list(model = mod.orig.bak,
rxns.added = c(),
Expand Down Expand Up @@ -195,8 +228,8 @@ gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,
mod.orig <- add_missing_exchanges(mod.orig)

sol <- fba(mod.orig)
if(sol@ok!=ok | sol@obj < 1e-7){
warning(paste0("Final model ", mod.orig@mod_name, " cannot produce enough target even when all candidate reactions are added! obj=", sol@obj, " ok=",sol@ok))
if(sol@stat %notin% stat | sol@obj < 1e-7){
warning(paste0("Final model ", mod.orig@mod_name, " cannot produce enough target even when all candidate reactions are added! obj=", sol@obj, " stat=",sol@stat))
return(list(model = mod.orig.bak,
rxns.added = c(),
rxn.weights = rxn.weights,
Expand Down Expand Up @@ -246,7 +279,7 @@ gapfill4 <- function(mod.orig, mod.full, rxn.weights, min.gr = 0.01, bcore = 50,

rxns.added <- gsub("_.0","",ko.dt[keep==T | core==T,dummy.rxn])
sol <- fba(mod.orig)
if(sol@ok!=ok){
if(sol@stat %notin% stat){
stop("Final model cannot grow. Something is terribly wrong!")
return(list(model = mod.orig.bak,
rxns.added = c(),
Expand Down
5 changes: 3 additions & 2 deletions src/generate_GSdraft.R
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,10 @@ build_draft_model_from_blast_results <- function(blast.res, transporter.res, bio
}
}
if(input_mode == "nucl") {
ncont <- length(unique(dt_genes[rm == F, stitle]))
cat(length(unique(dt_genes[rm == F & seed %in% mseed$id, paste(gene, sep="$")])),
"unique genes on",length(unique(dt_genes[rm == F, stitle])),
"genetic element(s)\n")
"unique genes on",ncont,
ifelse(ncont==1,"contig","contigs"),"\n")
}
if(input_mode == "prot") {
cat(length(unique(dt_genes[rm == F & seed %in% mseed$id, paste(gene, sep="$")])),
Expand Down
23 changes: 11 additions & 12 deletions src/gf.adapt.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ if (!is.na(Sys.getenv("RSTUDIO", unset = NA))) {

# select solver
if( "cobrarCPLEX" %in% rownames(installed.packages()) ){
COBRAR_SETTINGS("SOLVER","cplex"); ok <- 1
COBRAR_SETTINGS("SOLVER","cplex"); stat <- c(1,2)
}else{
warning("glpk is used as LP solver but cplex is recommended because it is much faster. If you have IBM's ILOG cplex installed, you can add cplex-support to gapseq by installing the R-package 'cobarCPLEX' (https://github.com/Waschina/cobrarCPLEX).")
COBRAR_SETTINGS("SOLVER","glpk"); ok <- 0
COBRAR_SETTINGS("SOLVER","glpk"); stat <- c(2,5)
}

#setwd(script.dir)
Expand Down Expand Up @@ -86,7 +85,7 @@ check_lethal <- function(model, rxn_list, med.file=NA, med=NA){
mod.tmp <- rmReact(mod.tmp, react=model@react_id[na.omit(rxn.idx)])

sol <- fba(mod.tmp)
lethal <- !(sol@stat == ok & sol@obj >= 1e-7)
lethal <- !(sol@stat %in% stat & sol@obj >= 1e-7)
lethal.dt <- rbind(lethal.dt, data.table(nr=i, rxn=paste0(model@react_id[na.omit(rxn.idx)],collapse=","), lethal=lethal))
} else lethal.dt <- rbind(lethal.dt, data.table(nr=i, rxn="", lethal=NA))
}else lethal.dt <- rbind(lethal.dt, data.table(nr=i, rxn="", lethal=NA))
Expand Down Expand Up @@ -154,9 +153,9 @@ add_growth <- function(model.orig, add.met.id=NA, weights=NA, genes=NA, verbose=
mod.adapt <- esp.mode(mod.adapt, media)

sol <- fba(mod.adapt)
#cat("Check growth: ", sol@stat==ok, "\t", sol@obj, "\n")
#cat("Check growth: ", sol@stat %in% stat, "\t", sol@obj, "\n")

if(sol@stat == ok & sol@obj >= 1e-7){
if(sol@stat %in% stat & sol@obj >= 1e-7){
warning(paste("Model is already growing with", add.met.id, add.met.name))
if( verbose ){
cat("solution", sol$obj, "\n")
Expand Down Expand Up @@ -190,7 +189,7 @@ add_growth <- function(model.orig, add.met.id=NA, weights=NA, genes=NA, verbose=
#check_growth(mod.adapt.lst$model, medium = c(media$compounds), printExchanges = T, useNames = T, mtf = T)

sol <- fba(mod.adapt)
#cat(sol@stat==ok, "\t", sol@obj, "\n")
#cat(sol@stat %in% stat, "\t", sol@obj, "\n")
cat("\n")

check <- check_lethal(mod.adapt, rxn_list = setdiff(mod.adapt@react_id, model.orig@react_id), med = media)
Expand Down Expand Up @@ -241,8 +240,8 @@ rm_growth <- function(model.orig, del.met.id, use.media=NA, rxn.blast.file, verb

model <- esp.mode(model.orig, media)
sol <- fba(model)
#cat(sol@stat==ok, "\t", sol@obj, "\n")
if( !(sol@stat==ok & sol@obj >= 1e-7 ) ){
#cat(sol@stat %in% stat, "\t", sol@obj, "\n")
if( !(sol@stat %in% stat & sol@obj >= 1e-7 ) ){
warning(paste("Model already cannot grow with:", del.met.id, del.met.name))
return(invisible(model.orig))
}else{
Expand Down Expand Up @@ -325,9 +324,9 @@ check_theo_growth <- function(check.met.id, fullmod){
mod.adapt <- constrain.model(mod.adapt, media=media)

sol <- pfbaHeuristic(mod.adapt)
cat("Check growth: ", sol@stat==ok, "\t", sol@obj, "\n")
cat("Check growth: ", sol@stat %in% stat, "\t", sol@obj, "\n")

if(sol@stat == ok & sol@obj >= 1e-7){
if(sol@stat %in% stat & sol@obj >= 1e-7){
print("Growth possible")
}else{
print("No growth possible")
Expand Down Expand Up @@ -363,7 +362,7 @@ increase_growth <- function(model.orig, min.obj.val, weights=NA, genes=NA, verbo

sol <- fba(model.orig)
cat("Old growth rate:", round(sol@obj,6), "\n")
if(sol@stat == ok & sol@obj >= min.obj.val){
if(sol@stat %in% stat & sol@obj >= min.obj.val){
warning(paste("Model already has growth rate >=",min.obj.val))
return(invisible(model.orig))
}else{
Expand Down
32 changes: 19 additions & 13 deletions src/gf.suite.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ suppressMessages(library(tools))

# select solver
if( "cobrarCPLEX" %in% rownames(installed.packages()) ){
COBRAR_SETTINGS("SOLVER","cplex"); ok <- 1
COBRAR_SETTINGS("SOLVER","cplex"); stat <- c(1,2)
}else{
COBRAR_SETTINGS("SOLVER","glpk"); ok <- 0
COBRAR_SETTINGS("SOLVER","glpk"); stat <- c(2,5)
}
# COBRAR_SETTINGS("SOLVER","glpk"); stat <- c(2,5)
cat("LP solver:",COBRAR_SETTINGS("SOLVER"),"\n")

# Setting defaults if required
Expand Down Expand Up @@ -99,6 +100,12 @@ min.obj.val <- opt$min.obj.val
dummy.weight <- 100
sbml.export <- FALSE

# check if minimum required growth rate is valid
if(min.obj.val < 0.001) {
warning("Minimum required growth rate ('-k') should not be smaller than 0.001. Resetting value to 0.001.")
min.obj.val <- 0.001
}

# create output directory if not already there
dir.create(output.dir, recursive = TRUE, showWarnings = FALSE)
if (!dir.exists(output.dir) || file.access(output.dir, mode = 2) == -1)
Expand Down Expand Up @@ -331,7 +338,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

sol <- fba(mod.fill2)

if(sol@ok == ok & sol@obj >= 1e-6){
if(sol@stat %in% stat & sol@obj >= 1e-6){
mod.fill2@obj_coef <- rep(0,react_num(mod.fill2))
}else{
if( verbose ) cat("\nTry to gapfill", bm.met.name[i],"\n")
Expand Down Expand Up @@ -368,11 +375,10 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

cat("\rGapfill summary:\n")
cat("Filled components: ",mod.fill2.counter, "(",paste(mod.fill2.names, collapse = ","),")\n")
cat("Added reactions: ",length(mod.fill2@react_id)-length(mod.fill1@react_id),"\n")
cat("Added reactions: ",length(mod.fill2@react_id[!grepl("^EX_|^DM",mod.fill2@react_id) & mod.fill2@react_id %notin% mod.fill1@react_id]),"\n")
cat("Final growth rate: ",fba(mod.fill2)@obj,"\n")



media2 <- fread(paste0(script.dir,"/../dat/media/MM_glu.csv")) # load minimal medium and add available carbon sources
if( nrow(fread(media.file, header=F)[V1=="cpd00007" & V3!=0]) ){
cat("\n\n2b. Anaerobic biomass gapfilling using core reactions only\n")
Expand Down Expand Up @@ -424,7 +430,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

sol <- fba(mod.fill2)

if(sol@ok == ok & sol@obj >= 1e-6){
if(sol@stat %in% stat & sol@obj >= 1e-6){
mod.fill2@obj_coef <- rep(0,react_num(mod.fill2))
}else{
if( verbose ) cat("\nTry to gapfill", bm.met.name[i],"\n")
Expand Down Expand Up @@ -461,7 +467,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

cat("\rGapfill summary:\n")
cat("Filled components: ",mod.fill2.counter, "(",paste(mod.fill2.names, collapse = ","),")\n")
cat("Added reactions: ",length(mod.fill2@react_id)-length(mod.orig2@react_id),"\n")
cat("Added reactions: ",length(mod.fill2@react_id[!grepl("^EX_|^DM",mod.fill2@react_id) & mod.fill2@react_id %notin% mod.fill1@react_id]),"\n")
cat("Final growth rate: ",fba(mod.fill2)@obj,"\n")


Expand All @@ -474,7 +480,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not
exchanges.new.met <- str_replace(str_remove(carbon.source$seed[idx], "EX_"),"_e0","\\[e0\\]")
exchanges.new.name <- mod@met_name[match(exchanges.new.met,mod@met_id)]
exchanges.new.ids <- carbon.source$seed[idx]
exchanges.new.used <- rep(FALSE, length(exchanges.new.ids)) # delete unused addionally added exchange reactions later
exchanges.new.used <- rep(FALSE, length(exchanges.new.ids)) # delete unused additionally added exchange reactions later
mod.out <- add_exchanges(mod.out, exchanges.new.met, metname=exchanges.new.name)

if ( !quick.gf ){
Expand Down Expand Up @@ -539,7 +545,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

sol <- fba(mod.fill3)

if(sol@ok == ok & sol@obj >= 1e-7){
if(sol@stat %in% stat & sol@obj >= 1e-7){
#mod.fill3@obj_coef <- rep(0,react_nummod.fill3))
src.status <- TRUE
}else{
Expand Down Expand Up @@ -577,7 +583,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not
mod.out <- mod.fill3
cat("\rGapfill summary:\n")
cat("Filled components: ",mod.fill3.counter, "(",paste(mod.fill3.names, collapse = ","),")\n")
cat("Added reactions: ",length(mod.fill3@react_id)-length(mod.fill2@react_id),"\n")
cat("Added reactions: ",length(mod.fill3@react_id[!grepl("^EX_|^DM",mod.fill3@react_id) & mod.fill3@react_id %notin% mod.fill2@react_id]),"\n")
cat("Final growth rate: ",fba(mod.fill3)@obj,"\n")
}

Expand Down Expand Up @@ -626,7 +632,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

sol <- fba(mod.fill4)

if(sol@ok == ok & sol@obj >= 1e-7){
if(sol@stat %in% stat & sol@obj >= 1e-7){
#mod.fill4@obj_coef <- rep(0,react_num(mod.fill4))
src.status <- TRUE
}else{
Expand Down Expand Up @@ -673,7 +679,7 @@ if(nrow(mseed.t)>0) { # Skip steps 2,2b,3, and 4 if core-reaction list does not

cat("\rGapfill summary:\n")
cat("Filled components: ",mod.fill4.counter, "(",paste(mod.fill4.names, collapse = ","),")\n")
cat("Added reactions: ",length(mod.fill4@react_id)-length(mod.fill3@react_id),"\n")
cat("Added reactions: ",length(mod.fill4@react_id[!grepl("^EX_|^DM",mod.fill4@react_id) & mod.fill4@react_id %notin% mod.fill3@react_id]),"\n")
cat("Final growth rate: ",mod.fill4.sol@fluxes[which(mod.fill4@obj_coef==1)],"\n\n")

cat("Uptake at limit:\n")
Expand All @@ -690,7 +696,7 @@ mod.out <- add_missing_exchanges(mod.out)
# delete unused additionally added exchange reactions later
exchanges.rm <- exchanges.new.ids[!exchanges.new.used]
if( length(exchanges.rm) > 0 )
mod.out <- rmReact(mod.out, react=exchanges.rm)
mod.out <- rmReact(mod.out, react=exchanges.rm)
mod.out <- rm_unused_exchanges(mod.out)


Expand Down
8 changes: 0 additions & 8 deletions src/pan-draft.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,6 @@ suppressMessages(library(methods))
# Little helpers
source(paste0(script.dir,"/pan-draft_functions.R"))

# select solver
if( "cobrarCPLEX" %in% rownames(installed.packages()) ){
COBRAR_SETTINGS("SOLVER","cplex"); ok <- 1
}else{
warning("glpk is used as LP solver but cplex is recommended because it is much faster. If you have IBM's ILOG cplex installed, you can add cplex-support to gapseq by installing the R-package 'cobarCPLEX' (https://github.com/Waschina/cobrarCPLEX).")
COBRAR_SETTINGS("SOLVER","glpk"); ok <- 0
}

# Setting defaults if required
if ( is.null(opt$output.dir) ) { opt$output.dir = "." }
if ( is.null(opt$min.rxn.freq.in.mods) ) { opt$min.rxn.freq.in.mods = 0.06 }
Expand Down
Loading

0 comments on commit 568dfa8

Please sign in to comment.