Skip to content

Commit

Permalink
Merge pull request #10 from genomize/master
Browse files Browse the repository at this point in the history
Pulling in changes from @genomize
  • Loading branch information
mdshw5 authored Dec 8, 2016
2 parents c723931 + ee077cf commit 6275130
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions simplesam.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
except ImportError: #python2
from _multiprocessing import Connection

__version__ = '0.0.4'
__version__ = '0.0.5'

class DefaultOrderedDict(OrderedDict):
def __init__(self, default, items=[]):
Expand Down Expand Up @@ -61,8 +61,11 @@ def __eq__(self, other):

class Reader(object):
""" Read SAM/BAM format file using iterator. """
def __init__(self, f, regions=False, kind=None):
def __init__(self, f, regions=False, kind=None, samtools_path=None):
ext = None
if samtools_path is None:
samtools_path = "samtools" # Get from the PATH
self.samtools_path = samtools_path
self.spool = None # use this to catch alignment during reader scraping
self.type = 'sam'
try:
Expand Down Expand Up @@ -113,7 +116,7 @@ def _sam_init(self, f):
self._conn = 'file'

def _bam_init(self, f, regions):
pline = ['samtools', 'view', '-H', f.name]
pline = [self.samtools_path, 'view', '-H', f.name]
try:
p = Popen(pline, bufsize=-1, stdout=PIPE,
stderr=PIPE)
Expand All @@ -126,16 +129,16 @@ def _bam_init(self, f, regions):
open(''.join([f.name, '.bai']))
except EnvironmentError:
sys.stderr.write("BAM index not found. Attempting to index file.\n")
index_p = Popen(['samtools', 'index', f.name], stdout=PIPE, stderr=PIPE)
index_p = Popen([self.samtools_path, 'index', f.name], stdout=PIPE, stderr=PIPE)

_, err = index_p.communicate()
if index_p.returncode > 0 or re.search("fail", str(err)):
raise OSError("Indexing failed. Is the BAM file sorted?\n")
else:
sys.stderr.write("Index created successfully.\n")
pline = ['samtools', 'view', f.name, regions]
pline = [self.samtools_path, 'view', f.name, regions]
else:
pline = ['samtools', 'view', f.name]
pline = [self.samtools_path, 'view', f.name]
self.p = Popen(pline, bufsize=-1, stdout=PIPE,
stderr=PIPE)
if PY3:
Expand Down Expand Up @@ -171,7 +174,7 @@ def __len__(self):
if self.type != 'bam':
raise NotImplementedError("len(Reader) is only implemented for bam files.")
elif self.type == 'bam':
return sum(bam_read_count(self._f_name))
return sum(bam_read_count(self._f_name, self.samtools_path))

def subsample(self, n):
""" Draws every nth read from self. Returns Sam. """
Expand Down Expand Up @@ -505,9 +508,11 @@ def tile_region(rname, start, end, step):
if start < end:
yield '%s:%d-%d' % (rname, start, end)

def bam_read_count(bamfile):
def bam_read_count(bamfile, samtools_path=None):
""" Return a tuple of the number of mapped and unmapped reads in a bam file """
p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
if samtools_path is None:
samtools_path = "samtools" # Get from the PATH
p = Popen([samtools_path, 'idxstats', bamfile], stdout=PIPE)
mapped = 0
unmapped = 0
for line in p.stdout:
Expand Down

0 comments on commit 6275130

Please sign in to comment.