Skip to content

Commit

Permalink
fixed bugs with file input and output and naming for running external…
Browse files Browse the repository at this point in the history
… method CBaSE
  • Loading branch information
ashuaibi7 committed Dec 13, 2024
1 parent dd002ed commit 7ac0732
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 20 deletions.
28 changes: 15 additions & 13 deletions external/cbase_params_v1.2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,11 +1249,13 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
)
mutations_df = pd.DataFrame(mutations)
mutations_df.to_csv(
"{}/{}_kept_mutations.csv".format(TEMP_DIR, OUTNAME), index=False, sep="\t"
"{}/{}_kept_mutations.csv".format(TEMP_DIR, os.path.basename(OUTNAME)),
index=False,
sep="\t",
)
else: # Import mutation file (original CBaSE) with format: [gene, effect, alt, context]
mutations = import_maf_data(INFILE, CMODE)
sys.stderr.write("%i SNVs imported for file %s.\n" % (len(mutations), OUTNAME))
sys.stderr.write("%i SNVs imported for file %s.\n" % (len(mutations), INFILE))

lengths = [
[k, len(list(g))]
Expand All @@ -1279,7 +1281,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
mutations,
abundances,
effects_by_gene,
"%s/mutation_mat_%s.txt" % (TEMP_DIR, OUTNAME),
"%s/mutation_mat.txt" % TEMP_DIR,
)
sys.stderr.write("Finished data preparation.\n")

Expand Down Expand Up @@ -1328,7 +1330,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[2] == 1e20:
sys.stderr.write("Could not find a converging solution for model 1.\n")
fout = open("%s/param_estimates_%s_1.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_1.txt" % TEMP_DIR, "w")
fout.write("%e, %e, %f, %i\n" % (cur_min_res[0], cur_min_res[1], cur_min_res[2], 1))
fout.close()

Expand All @@ -1351,7 +1353,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[2] == 1e20:
sys.stderr.write("Could not find a converging solution for model 2.\n")
fout = open("%s/param_estimates_%s_2.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_2.txt" % TEMP_DIR, "w")
fout.write("%e, %e, %f, %i\n" % (cur_min_res[0], cur_min_res[1], cur_min_res[2], 2))
fout.close()

Expand Down Expand Up @@ -1384,7 +1386,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[4] == 1e20:
sys.stderr.write("Could not find a converging solution for model 3.\n")
fout = open("%s/param_estimates_%s_3.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_3.txt" % TEMP_DIR, "w")
fout.write(
"%e, %e, %e, %e, %f, %i\n"
% (
Expand Down Expand Up @@ -1427,7 +1429,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[4] == 1e20:
sys.stderr.write("Could not find a converging solution for model 4.\n")
fout = open("%s/param_estimates_%s_4.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_4.txt" % TEMP_DIR, "w")
fout.write(
"%e, %e, %e, %e, %f, %i\n"
% (
Expand Down Expand Up @@ -1472,7 +1474,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[5] == 1e20:
sys.stderr.write("Could not find a converging solution for model 5.\n")
fout = open("%s/param_estimates_%s_5.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_5.txt" % TEMP_DIR, "w")
fout.write(
"%e, %e, %e, %e, %e, %f, %i\n"
% (
Expand Down Expand Up @@ -1518,7 +1520,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
cur_min_res = p_res[:]
if cur_min_res[5] == 1e20:
sys.stderr.write("Could not find a converging solution for model 6.\n")
fout = open("%s/param_estimates_%s_6.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/param_estimates_6.txt" % TEMP_DIR, "w")
fout.write(
"%e, %e, %e, %e, %e, %f, %i\n"
% (
Expand All @@ -1535,7 +1537,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):

# ************************************************************************************************************

p_files = glob.glob("%s/param_estimates_%s_*.txt" % (TEMP_DIR, OUTNAME))
p_files = glob.glob("%s/param_estimates_*.txt" % TEMP_DIR)

all_models = []
for p_file in p_files:
Expand All @@ -1554,7 +1556,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
if cur_min < 1e20:
# Export parameters and index of chosen model.
sys.stderr.write("Best model fit: model %i.\n" % int(all_models[cur_ind][-1]))
fout = open("%s/used_params_and_model_%s.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/used_params_and_model.txt" % TEMP_DIR, "w")
fout.write(
"".join(
[
Expand All @@ -1568,7 +1570,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
)
fout.close()
# Import parameters and index of chosen model.
fin = open("%s/used_params_and_model_%s.txt" % (TEMP_DIR, OUTNAME))
fin = open("%s/used_params_and_model.txt" % TEMP_DIR)
lines = fin.readlines()
fin.close()
field = lines[0].strip().split(", ")
Expand Down Expand Up @@ -1602,7 +1604,7 @@ def check_user_input(VCF, BUILD, CMODE, MODEL):
]
)

fout = open("%s/output_data_preparation_%s.txt" % (TEMP_DIR, OUTNAME), "w")
fout = open("%s/output_data_preparation.txt" % TEMP_DIR, "w")
# Output format: [gene, lm, lk, ls, mobs, kobs, sobs, Lgene, lambda_s]
fout.write(
"gene\tl_m\tl_k\tl_s\tm_obs\tk_obs\ts_obs\tL_gene\tlambda_s\ts_max_per_sample\tN_samples=%i\n"
Expand Down
14 changes: 7 additions & 7 deletions external/cbase_qvals_v1.2.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def _pmf(self, x, s, eL, rat):

def export_pofxigivens_table(pvals_array, x_ind):
label = ["m", "k"][x_ind - 13]
fout = open("%s/pof%sigivens_%s.txt" % (TEMP_DIR, label, outname), "w")
fout = open("%s/pof%sigivens.txt" % (TEMP_DIR, label), "w")
for gene in pvals_array:
fout.write("%s_%si\t" % (gene[0], label))
for i in range(len(gene[x_ind])):
Expand All @@ -670,7 +670,7 @@ def export_pofxigivens_table(pvals_array, x_ind):


def export_pofxgivens_table(pvals_array, x_ind, outfile):
fout = open("%s/%s_%s.txt" % (TEMP_DIR, outfile, outname), "w")
fout = open("%s/%s.txt" % (TEMP_DIR, outfile), "w")
label = ["m", "k"][x_ind - 11]
for gene in pvals_array:
fout.write("%s_%s\t" % (gene[0], label))
Expand Down Expand Up @@ -970,7 +970,7 @@ def combine_qvalues(all_neg, all_pos):
#
# Collect parameter estimates from all fitted models in working directory.

p_files = glob.glob("%s/param_estimates_%s_*.txt" % (TEMP_DIR, outname))
p_files = glob.glob("%s/param_estimates_*.txt" % TEMP_DIR)

all_models = []
for p_file in p_files:
Expand All @@ -988,7 +988,7 @@ def combine_qvalues(all_neg, all_pos):
cur_ind = m
if cur_min < 1e20:
sys.stderr.write("Best model fit: model %i.\n" % int(all_models[cur_ind][-1]))
fout = open("%s/used_params_and_model_%s.txt" % (TEMP_DIR, outname), "w")
fout = open("%s/used_params_and_model.txt" % TEMP_DIR, "w")
fout.write(
"".join(
[
Expand All @@ -1009,7 +1009,7 @@ def combine_qvalues(all_neg, all_pos):
# Compute q-values for all genes, using best fitting model.

# Import parameters and index of chosen model.
fin = open("%s/used_params_and_model_%s.txt" % (TEMP_DIR, outname))
fin = open("%s/used_params_and_model.txt" % TEMP_DIR)
lines = fin.readlines()
fin.close()
field = lines[0].strip().split(", ")
Expand All @@ -1025,7 +1025,7 @@ def combine_qvalues(all_neg, all_pos):
sys.exit()

# Import output from data_preparation, containing l_x and (m,k,s)_obs.
fin = open("%s/output_data_preparation_%s.txt" % (TEMP_DIR, outname))
fin = open("%s/output_data_preparation.txt" % TEMP_DIR)
lines = fin.readlines()
fin.close()
mks_type = []
Expand Down Expand Up @@ -1092,7 +1092,7 @@ def combine_qvalues(all_neg, all_pos):
)

# Output q-values in file.
fout = open("%s/q_values_%s.txt" % (TEMP_DIR, outname), "w")
fout = open("%s/q_values.txt" % TEMP_DIR, "w")
fout.write("%s\t%s\n" % (mod_choice[mod_C], params))
fout.write(
"gene\tp_phi_m_neg\tq_phi_m_neg\tphi_m_neg\tp_phi_k_neg\tq_phi_k_neg\tphi_k_neg\tp_phi_neg\tq_phi_neg\tphi_neg\tp_phi_m_pos\tq_phi_m_pos\tphi_m_pos_or_p(m=0|s)\tp_phi_k_pos\tq_phi_k_pos\tphi_k_pos_or_p(k=0|s)\tp_phi_pos\tq_phi_pos\tphi_pos_or_p(m=0|s)*p(k=0|s)\tm_obs\tk_obs\ts_obs\tdm/ds\tdk/ds\td(m+k)/ds\n"
Expand Down

0 comments on commit 7ac0732

Please sign in to comment.