diff --git a/lib/mmCIF_to_biounits.py b/lib/mmCIF_to_biounits.py index 243ec8c..294b381 100644 --- a/lib/mmCIF_to_biounits.py +++ b/lib/mmCIF_to_biounits.py @@ -20,8 +20,11 @@ # from pdbx.reader.PdbxReader import PdbxReader # from pdbx.writer.PdbxWriter import PdbxWriter # from pdbx.reader.PdbxContainers import * +import logging +LOGGER = logging.getLogger() def parseOperationExpression(expression: str) : + LOGGER.debug('Parsing biounit operation %s',str) operations = [] stops = [ "," , "-" , ")" ] @@ -95,6 +98,9 @@ def prepareOperation(mmcif_dict, op1index, op2index) : return operation def mmCIF_to_biounits(mmcif_dict): + """Return a list of mmcif dictionaries for each biounit, and a dictionary of chains copies to new chains""" + biounits_as_mmcif_dicts = [] + assembly_ids = mmcif_dict.get('_pdbx_struct_assembly_gen.assembly_id',None) if not assembly_ids: # We cannot create biounits if there are not assemblies to create return [] @@ -115,8 +121,10 @@ def mmCIF_to_biounits(mmcif_dict): assert len(atom_site_Cartn_y) == atom_site_count,"Cartn_x entries are missing in the structure" assert len(atom_site_Cartn_z) == atom_site_count,"Cartn_x entries are missing in the structure" - # Create a CIF file for every assembly specified in pdbx_struct_assembly_gen + # Create a CIF dictionary for every assembly specified in pdbx_struct_assembly_gen + for index in range(assembly_count): + LOGGER.info("Processing biounit assembly %d of %d",index,assembly_count) # Ultimately we want to create transformed lists of atomic coordinates # setup numpy matrix to receive maximum number of calculation results # Keep in mind that often many atoms are excluded from biounits @@ -176,6 +184,7 @@ def mmCIF_to_biounits(mmcif_dict): # For every operation in the first parenthesized list for op1 in oper : + LOGGER.debug("op1=%s",str(op1)) # Find the index of the current operation in the oper_list category table op1index = 0 for row in range(oper_list_count): @@ -235,10 +244,5 @@ def mmCIF_to_biounits(mmcif_dict): # for key in new_mmcif_dict: # if '_atom_site.' in key: # print(key,len(new_mmcif_dict[key])) - - from Bio.PDB.mmcifio import MMCIFIO - io = MMCIFIO() - io.set_dict(mmcif_dict) - io.save("/tmp/new_rcox.cif") - print("Success for now") - sys.exit(1) + biounits_as_mmcif_dicts.append(new_mmcif_dict) + return biounits_as_mmcif_dicts diff --git a/lib/tests/test_mmCIF_to_biounits.py b/lib/tests/test_mmCIF_to_biounits.py index 8963802..8c82845 100755 --- a/lib/tests/test_mmCIF_to_biounits.py +++ b/lib/tests/test_mmCIF_to_biounits.py @@ -2,6 +2,7 @@ """Very specific tests of transcript alignments""" import pytest +import sys import warnings import logging import pprint @@ -9,26 +10,39 @@ from Bio.PDB import * from lib.mmCIF_to_biounits import mmCIF_to_biounits -LOGGER = logging.getLogger() # warnings.simplefilter('ignore', PDBConstructionWarning) -def test_mmCIF_to_biounits(): - """ - 6DWU should return two biounit file +def exercise_mmCIF_to_biounits(pdb_id): + LOGGER.info("Attempting biounit creation for %s"%pdb_id) + filename = PDBList().retrieve_pdb_file(pdb_id,file_format='mmCif',pdir='/tmp',overwrite=True) + LOGGER.info("Successful download of %s to %s",pdb_id,filename) + sys.exit(1) + mmcif_parser = MMCIFParser(QUIET=True) + structure_6DWU = mmcif_parser.get_structure('pdb',filename) + biounits_list= mmCIF_to_biounits(mmcif_parser._mmcif_dict) + assert type(biounits_list[0]) == dict + LOGGER.info("%d biounits returned for %s",len(biounits_list),pdb_id) - First, align using the new sifts isoform specific mechanism. - Second, aling with biopython Needleman-Wunsch - Third, attempt a terrible alignment - """ + # Can we save the biounit??? + mmcif_io = MMCIFIO() + mmcif_io.set_dict(biounits_list[0]) + mmcif_io.save('/tmp/%s1.cif'%pdb_id) + return biounits_list - # Alignment 1: Sifts isoform specific alignment - filename_6DWU = PDBList().retrieve_pdb_file('6DWU',file_format='mmCif',pdir='/tmp',overwrite=True) - mmcif_parser = MMCIFParser(QUIET=True) - structure_6DWU = mmcif_parser.get_structure('6DWU',filename_6DWU) - # import pdb; pdb.set_trace() - biounits = mmCIF_to_biounits(mmcif_parser._mmcif_dict) - assert len(biounits) == 2,"Two biounits should have been created - but only got %d"%len(biounits) +def test_mmCIF_to_biounits(): + # biounits_list = exercise_mmCIF_to_biounits('6dw1') + # assert len(biounits) == 2,"Two biounits should have been created - but only got %d"%len(biounits) + + biounits_list = exercise_mmCIF_to_biounits('4rkk') + assert len(biounits_list) == 1 + biounits_list = exercise_mmCIF_to_biounits('3c70') + assert len(biounits_list) == 1 +FORMAT='%(asctime)s %(levelname)-4s [%(filename)16s:%(lineno)d] %(message)s' +logging.basicConfig(format=FORMAT,level=logging.DEBUG) +LOGGER = logging.getLogger() +LOGGER.info("Starting...") test_mmCIF_to_biounits() +