Skip to content

Commit

Permalink
first stabs at coaxing python3 biopython to create biounits #16
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisMoth committed May 12, 2020
1 parent fc5f0c4 commit 80f0300
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 23 deletions.
20 changes: 12 additions & 8 deletions lib/mmCIF_to_biounits.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
# from pdbx.reader.PdbxReader import PdbxReader
# from pdbx.writer.PdbxWriter import PdbxWriter
# from pdbx.reader.PdbxContainers import *
import logging
LOGGER = logging.getLogger()

def parseOperationExpression(expression: str) :
LOGGER.debug('Parsing biounit operation %s',str)
operations = []
stops = [ "," , "-" , ")" ]

Expand Down Expand Up @@ -95,6 +98,9 @@ def prepareOperation(mmcif_dict, op1index, op2index) :
return operation

def mmCIF_to_biounits(mmcif_dict):
"""Return a list of mmcif dictionaries for each biounit, and a dictionary of chains copies to new chains"""
biounits_as_mmcif_dicts = []

assembly_ids = mmcif_dict.get('_pdbx_struct_assembly_gen.assembly_id',None)
if not assembly_ids: # We cannot create biounits if there are not assemblies to create
return []
Expand All @@ -115,8 +121,10 @@ def mmCIF_to_biounits(mmcif_dict):
assert len(atom_site_Cartn_y) == atom_site_count,"Cartn_x entries are missing in the structure"
assert len(atom_site_Cartn_z) == atom_site_count,"Cartn_x entries are missing in the structure"

# Create a CIF file for every assembly specified in pdbx_struct_assembly_gen
# Create a CIF dictionary for every assembly specified in pdbx_struct_assembly_gen

for index in range(assembly_count):
LOGGER.info("Processing biounit assembly %d of %d",index,assembly_count)
# Ultimately we want to create transformed lists of atomic coordinates
# setup numpy matrix to receive maximum number of calculation results
# Keep in mind that often many atoms are excluded from biounits
Expand Down Expand Up @@ -176,6 +184,7 @@ def mmCIF_to_biounits(mmcif_dict):

# For every operation in the first parenthesized list
for op1 in oper :
LOGGER.debug("op1=%s",str(op1))
# Find the index of the current operation in the oper_list category table
op1index = 0
for row in range(oper_list_count):
Expand Down Expand Up @@ -235,10 +244,5 @@ def mmCIF_to_biounits(mmcif_dict):
# for key in new_mmcif_dict:
# if '_atom_site.' in key:
# print(key,len(new_mmcif_dict[key]))

from Bio.PDB.mmcifio import MMCIFIO
io = MMCIFIO()
io.set_dict(mmcif_dict)
io.save("/tmp/new_rcox.cif")
print("Success for now")
sys.exit(1)
biounits_as_mmcif_dicts.append(new_mmcif_dict)
return biounits_as_mmcif_dicts
44 changes: 29 additions & 15 deletions lib/tests/test_mmCIF_to_biounits.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,47 @@
"""Very specific tests of transcript alignments"""

import pytest
import sys
import warnings
import logging
import pprint

from Bio.PDB import *
from lib.mmCIF_to_biounits import mmCIF_to_biounits

LOGGER = logging.getLogger()
# warnings.simplefilter('ignore', PDBConstructionWarning)

def test_mmCIF_to_biounits():
"""
6DWU should return two biounit file
def exercise_mmCIF_to_biounits(pdb_id):
LOGGER.info("Attempting biounit creation for %s"%pdb_id)
filename = PDBList().retrieve_pdb_file(pdb_id,file_format='mmCif',pdir='/tmp',overwrite=True)
LOGGER.info("Successful download of %s to %s",pdb_id,filename)
sys.exit(1)
mmcif_parser = MMCIFParser(QUIET=True)
structure_6DWU = mmcif_parser.get_structure('pdb',filename)
biounits_list= mmCIF_to_biounits(mmcif_parser._mmcif_dict)
assert type(biounits_list[0]) == dict
LOGGER.info("%d biounits returned for %s",len(biounits_list),pdb_id)

First, align using the new sifts isoform specific mechanism.
Second, aling with biopython Needleman-Wunsch
Third, attempt a terrible alignment
"""
# Can we save the biounit???
mmcif_io = MMCIFIO()
mmcif_io.set_dict(biounits_list[0])
mmcif_io.save('/tmp/%s1.cif'%pdb_id)
return biounits_list

# Alignment 1: Sifts isoform specific alignment
filename_6DWU = PDBList().retrieve_pdb_file('6DWU',file_format='mmCif',pdir='/tmp',overwrite=True)
mmcif_parser = MMCIFParser(QUIET=True)
structure_6DWU = mmcif_parser.get_structure('6DWU',filename_6DWU)
# import pdb; pdb.set_trace()
biounits = mmCIF_to_biounits(mmcif_parser._mmcif_dict)
assert len(biounits) == 2,"Two biounits should have been created - but only got %d"%len(biounits)

def test_mmCIF_to_biounits():
# biounits_list = exercise_mmCIF_to_biounits('6dw1')
# assert len(biounits) == 2,"Two biounits should have been created - but only got %d"%len(biounits)

biounits_list = exercise_mmCIF_to_biounits('4rkk')
assert len(biounits_list) == 1
biounits_list = exercise_mmCIF_to_biounits('3c70')
assert len(biounits_list) == 1

FORMAT='%(asctime)s %(levelname)-4s [%(filename)16s:%(lineno)d] %(message)s'
logging.basicConfig(format=FORMAT,level=logging.DEBUG)
LOGGER = logging.getLogger()
LOGGER.info("Starting...")
test_mmCIF_to_biounits()


0 comments on commit 80f0300

Please sign in to comment.