From b99b1445aa6be04346a246ac900a717634d981ce Mon Sep 17 00:00:00 2001 From: Xavier Robin Date: Thu, 28 Mar 2019 13:15:26 +0100 Subject: [PATCH] Read gzipped mmCIF files as input --- scripts/cif2fasta.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/scripts/cif2fasta.py b/scripts/cif2fasta.py index 7edd23c7..27d172ee 100755 --- a/scripts/cif2fasta.py +++ b/scripts/cif2fasta.py @@ -6,7 +6,8 @@ @author: Harald Voehringer """ -import sys, os, glob, textwrap, itertools +from __future__ import print_function +import sys, os, glob, gzip, textwrap, itertools from optparse import OptionParser from collections import defaultdict from os.path import splitext @@ -43,17 +44,22 @@ def __init__(self, cif_path): self.block = self.open_cif() def open_cif(self): - """ Assumes a mmCif file and returns a data block used for subsequent procedures. """ + """ Assumes a mmCIF or gzipped mmCIF file and returns a data block used for subsequent procedures. """ # The "usual" procedure to open a mmCIF with pdbX/mmCIF - with open(self.cif_path) as cif_fh: - data = [] - reader = PdbxReader(cif_fh) - reader.read(data) - if len(data) == 0: - return None - else: - return data[0] + data = [] + try: + with gzip.open(self.cif_path) as cif_fh: + reader = PdbxReader(cif_fh) + reader.read(data) + except IOError: + with open(self.cif_path) as cif_fh: + reader = PdbxReader(cif_fh) + reader.read(data) + if len(data) == 0: + return None + else: + return data[0] def is_valid(self): return self.block is not None @@ -496,7 +502,7 @@ def get_paths(in_folder, out_folder): for root, dirs, files in os.walk(in_folder): for fname in files: - if fname.endswith(".cif"): + if fname.endswith(".cif") or fname.endswith(".cif.gz"): in_path = os.path.join(root, fname) out_file = in_path.split('/')[-1].split('.')[0] + ".fasta" out_path = os.path.join(out_folder, out_file)