Skip to content

Commit

Permalink
add --haploid parameter to work with haploid genomes
Browse files Browse the repository at this point in the history
  • Loading branch information
mehrdadbakhtiari committed Jul 3, 2018
1 parent 38ab8d9 commit dcad3bf
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 10 deletions.
2 changes: 2 additions & 0 deletions advntr/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def main():
help='set this flag to determine long expansion from PCR-free data')
genotype_algortihm_group.add_argument('-c', '--coverage', type=float, metavar='<float>',
help='average sequencing coverage in PCR-free sequencing')
genotype_algortihm_group.add_argument('--haploid', action='store_true', default=False,
help='set this flag if the organism is haploid')
genotype_algortihm_group.add_argument('-naive', '--naive', action='store_true', default=False,
help='use naive approach for PacBio reads')

Expand Down
2 changes: 1 addition & 1 deletion advntr/advntr_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def genotype(args, genotype_parser):
target_vntrs = [int(vid) for vid in args.vntr_id.split(',')]
else:
target_vntrs = illumina_targets
genome_analyzier = GenomeAnalyzer(reference_vntrs, target_vntrs, working_directory)
genome_analyzier = GenomeAnalyzer(reference_vntrs, target_vntrs, working_directory, is_haploid=args.haploid)
if args.pacbio:
if input_is_alignment_file:
genome_analyzier.find_repeat_counts_from_pacbio_alignment_file(input_file)
Expand Down
13 changes: 8 additions & 5 deletions advntr/genome_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,24 @@


class GenomeAnalyzer:
def __init__(self, reference_vntrs, target_vntr_ids, working_directory='./'):
def __init__(self, reference_vntrs, target_vntr_ids, working_directory='./', is_haploid=False):
self.reference_vntrs = reference_vntrs
self.target_vntr_ids = target_vntr_ids
self.working_dir = working_directory
self.is_haploid = is_haploid

self.vntr_finder = {}
for ref_vntr in self.reference_vntrs:
if ref_vntr.id in target_vntr_ids:
self.vntr_finder[ref_vntr.id] = VNTRFinder(ref_vntr)
self.vntr_finder[ref_vntr.id] = VNTRFinder(ref_vntr, is_haploid=is_haploid)

@staticmethod
def print_genotype(vntr_id, copy_numbers):
def print_genotype(self, vntr_id, copy_numbers):
print(vntr_id)
if copy_numbers is not None:
print('/'.join([str(cn) for cn in sorted(copy_numbers)]))
if self.is_haploid:
print(copy_numbers[0])
else:
print('/'.join([str(cn) for cn in sorted(copy_numbers)]))
else:
print('None')

Expand Down
11 changes: 7 additions & 4 deletions advntr/vntr_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ def is_mapped(self):
class VNTRFinder:
"""Find the VNTR structure of a reference VNTR in NGS data of the donor."""

def __init__(self, reference_vntr):
def __init__(self, reference_vntr, is_haploid=False):
self.reference_vntr = reference_vntr
self.is_haploid = is_haploid
self.min_repeat_bp_to_add_read = 2
if len(self.reference_vntr.pattern) < 30:
self.min_repeat_bp_to_add_read = 2
Expand Down Expand Up @@ -329,7 +330,7 @@ def get_conditional_likelihood(self, ck, ci, cj, ru_counts, r, r_e):
if ck != ci and ck != cj:
return 0.5 * (r_e ** abs(ck-ci) + r_e ** abs(ck-cj))

def find_genotype_based_on_observed_repeats(self, observed_copy_numbers, haploid=False):
def find_genotype_based_on_observed_repeats(self, observed_copy_numbers):
ru_counts = {}
for cn in observed_copy_numbers:
if cn not in ru_counts.keys():
Expand All @@ -353,7 +354,7 @@ def find_genotype_based_on_observed_repeats(self, observed_copy_numbers, haploid
for j in range(len(ru_counts)):
if j < i:
continue
if haploid and i != j:
if self.is_haploid and i != j:
continue
cj = ru_counts[j][0]
if (ci, cj) not in prs.keys():
Expand Down Expand Up @@ -533,7 +534,9 @@ def find_frameshift_from_alignment_file(self, alignment_file, unmapped_filtered_

@time_usage
def get_ru_count_with_coverage_method(self, pattern_occurrences, total_counted_vntr_bp, average_coverage):
return [int(pattern_occurrences / (average_coverage * 2.0))] * 2
haplotypes = 1 if self.is_haploid else 2
estimate = [int(pattern_occurrences / (float(average_coverage) * haplotypes))] * 2
return estimate
pattern_occurrences = total_counted_vntr_bp / float(len(self.reference_vntr.pattern))
read_mode = 'r' if alignment_file.endswith('sam') else 'rb'
samfile = pysam.AlignmentFile(alignment_file, read_mode)
Expand Down

0 comments on commit dcad3bf

Please sign in to comment.