Skip to content

Commit

Permalink
minor changes, remove commented code
Browse files Browse the repository at this point in the history
  • Loading branch information
pchaumeil committed May 10, 2022
1 parent 196147a commit 6daf1e0
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 56 deletions.
55 changes: 7 additions & 48 deletions gtdbtk/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,44 +281,6 @@ def place_genomes(self,
raise GenomeMarkerSetUnknown

pplacer.tog(pplacer_json_out, tree_file)

# Symlink to the tree summary file
# if marker_set_id == 'bac120' and levelopt is None:
# symlink_f(PATH_BAC120_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_BAC120_TREE_FILE.format(prefix=prefix))))
# elif levelopt == 'high':
# symlink_f(PATH_BACKBONE_BAC120_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_BACKBONE_BAC120_TREE_FILE.format(prefix=prefix))))
# elif levelopt == 'low':
# symlink_f(PATH_CLASS_LEVEL_BAC120_TREE_FILE.format(prefix=prefix, iter=tree_iter),
# os.path.join(out_dir,
# os.path.basename(PATH_CLASS_LEVEL_BAC120_TREE_FILE.format(prefix=prefix, iter=tree_iter))))
# elif marker_set_id == 'ar53':
# symlink_f(PATH_AR53_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_AR53_TREE_FILE.format(prefix=prefix))))
# else:
# self.logger.error('There was an error determining the marker set.')
# raise GenomeMarkerSetUnknown

# Symlink to the tree summary file
# if marker_set_id == 'bac120':
# if levelopt is None:
# symlink_f(PATH_BAC120_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_BAC120_TREE_FILE.format(prefix=prefix))))
# elif levelopt == 'high':
# symlink_f(PATH_BACKBONE_BAC120_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_BACKBONE_BAC120_TREE_FILE.format(prefix=prefix))))
# elif levelopt == 'low':
# symlink_f(PATH_CLASS_LEVEL_BAC120_TREE_FILE.format(iter=tree_iter, prefix=prefix),
# os.path.join(out_dir, os.path.basename(
# PATH_CLASS_LEVEL_BAC120_TREE_FILE.format(iter=tree_iter, prefix=prefix))))
# elif marker_set_id == 'ar53':
# symlink_f(PATH_AR53_TREE_FILE.format(prefix=prefix),
# os.path.join(out_dir, os.path.basename(PATH_AR53_TREE_FILE.format(prefix=prefix))))
# else:
# self.logger.error('There was an error determining the marker set.')
# raise GenomeMarkerSetUnknown

return tree_file

def _parse_red_dict(self, red_dist_dict):
Expand Down Expand Up @@ -355,13 +317,11 @@ def run(self,
out_dir,
prefix,
scratch_dir=None,
recalculate_red=None,
debugopt=False,
fulltreeopt=False):
"""Classify genomes based on position in reference tree."""

_bac_gids, _ar_gids, bac_ar_diff = Markers().genome_domain(align_dir, prefix)
disappearing_genomes = []

for marker_set_id in ('ar53', 'bac120'):

Expand Down Expand Up @@ -399,9 +359,9 @@ def run(self,
raise GenomeMarkerSetUnknown('There was an error determining the marker set.')

if (not os.path.exists(user_msa_file)) or (os.path.getsize(user_msa_file) < 30):
# file will not exist if there are no User genomes from a
# given domain
# but if there is Unclassified genomes without domain,
# file will not exist if there are no User genomes from a given domain
#
# But if there is Unclassified genomes without domain,
# they still have to be written in the bac120 summary file:
if marker_set_id == 'bac120':
# Add failed genomes from prodigal and genomes with no markers in the bac120 summary file
Expand Down Expand Up @@ -495,7 +455,7 @@ def run(self,
for disappearing_genome in disappearing_genomes:
disappearing_genomes_file.add_genome(disappearing_genome, tree_iter)

order_level_classification,classified_user_genomes = self._parse_tree(mrca_lowtree, genomes, msa_dict,
class_level_classification, classified_user_genomes = self._parse_tree(mrca_lowtree, genomes, msa_dict,
percent_multihit_dict, tln_table_summary_file.genomes,
bac_ar_diff, submsa_file_path, red_dict_file.data,
summary_file, pplacer_taxonomy_dict,
Expand All @@ -505,7 +465,7 @@ def run(self,

if debugopt:
with open(out_dir + '/' + prefix + '_class_level_classification.txt', 'a') as olf:
for l, b in order_level_classification.items():
for l, b in class_level_classification.items():
olf.write(l + '\t' + str(b) + '\n')
for l, b in classified_user_genomes.items():
olf.write(l + '\t' + str(b) + '\n')
Expand All @@ -514,12 +474,11 @@ def run(self,
self.add_filtered_genomes_to_summary(align_dir, summary_file, marker_set_id, prefix)

# Add failed genomes from prodigal and genomes with no markers in the bac120 summary file
# This is a executive direction: failed prodigal and genomes with no markers are nit bacterial or archaeal
# This is a executive direction: failed prodigal and genomes with
# no markers are not bacterial or archaeal
# but they need to be included in one of the summary file
self.add_failed_genomes_to_summary(align_dir, summary_file, prefix)



# Symlink to the summary file from the root
symlink_f(PATH_BAC120_SUMMARY_OUT.format(prefix=prefix),
os.path.join(out_dir, os.path.basename(PATH_BAC120_SUMMARY_OUT.format(prefix=prefix))))
Expand Down
7 changes: 3 additions & 4 deletions gtdbtk/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ def __scratch_dir(group):
help='reduce pplacer memory usage by writing to disk (slower).')


def __recalculate_red(group):
group.add_argument('-r', '--recalculate_red', default=False, action='store_true',
help='recalculate RED values based on the reference tree and all added user genomes')
# def __recalculate_red(group):
# group.add_argument('-r', '--recalculate_red', default=False, action='store_true',
# help='recalculate RED values based on the reference tree and all added user genomes')


def __full_tree(group):
Expand Down Expand Up @@ -448,7 +448,6 @@ def get_main_parser():
__pplacer_cpus(grp)
__scratch_dir(grp)
__full_tree(grp)
# __recalculate_red(grp)
__min_af(grp)
__temp_dir(grp)
__debug(grp)
Expand Down
3 changes: 1 addition & 2 deletions gtdbtk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,7 @@ def classify(self, options):
prefix=options.prefix,
scratch_dir=options.scratch_dir,
debugopt=options.debug,
fulltreeopt=options.full_tree,
recalculate_red=False)
fulltreeopt=options.full_tree)

self.logger.info('Note that Tk classification mode is insufficient for publication of new taxonomic '
'designations. New designations should be based on one or more de novo trees, an '
Expand Down
2 changes: 0 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def test_identify_align_classify(self):
classify_options.align_dir = align_options.out_dir
classify_options.out_dir = os.path.join(
self.generic_out_path, tmp_folder, 'classify')
classify_options.recalculate_red = False
self.optionparser.classify(classify_options)
summary_out = os.path.join(classify_options.out_dir,
PATH_AR53_SUMMARY_OUT.format(prefix=classify_options.prefix))
Expand Down Expand Up @@ -218,7 +217,6 @@ def test_classify_wf(self):
classify_wf_options.skip_gtdb_refs = False
classify_wf_options.cols_per_gene = None
classify_wf_options.max_consensus = None
classify_wf_options.recalculate_red = False
classify_wf_options.full_tree = True
self.optionparser.align(classify_wf_options)
self.optionparser.classify(classify_wf_options)
Expand Down

0 comments on commit 6daf1e0

Please sign in to comment.