Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add working blastpdb protocol #75

Open
wants to merge 2 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions prody2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,7 @@ def getProDyEnvName(version):
PWALIGN = 1 # biopython pwalign local pairwise sequence alignment after trivial mapping
CEALIGN = 2 # combinatorial extension (CE) as in PyMOL
DEFAULT = 3 # try pwalign then CE

UNITE_CHAINS_LABEL = "Unite chains in mmCIF segments"
UNITE_CHAINS_HELP = ('Elect whether to unite chains in mmCIF segments for each structure like ChimeraX. '
'Default is **False**, which means the smaller unit IDs are used for chains like PyMOL.')
65 changes: 65 additions & 0 deletions prody2/objects/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,68 @@ def loadAndWriteEnsemble(cls):
objLabel=cls.ens.getLabels()[j],
weight=weights[j])
cls.npz.append(frame)


hitEntryTypes = {
'num': String,
'bit-score': Float,
'score': Float,
'evalue': Float,
'query-from': Integer,
'query-to': Integer,
'hit-from': Integer,
'hit-to': Integer,
'query-frame': Integer,
'hit-frame': Integer,
'identity': Integer,
'positive': Integer,
'gaps': Integer,
'align-len': Integer,
'qseq': String,
'hseq': String,
'midline': String,
'query-len': Integer,
'percent_identity': Float,
'percent_coverage': Float,
'pdb_id': String,
'chain_id': String,
'title': String
}

class BlastHit(EMObject):
"""Represents each hit from blastPDB"""

def __init__(self, key=None, hit=None, **kwargs):
"""
Params:
:param hit: a hit from the BlastRecord
"""
EMObject.__init__(self, **kwargs)

self._key = String(key)
for key, value in hit.items():
persistType = hitEntryTypes[key]
setattr(self, key.replace('-', '_'),
persistType(value))

class SetOfBlastHits(EMSet):
""" Represents a set of Blast hits"""
ITEM_TYPE = BlastHit

def createSetOfBlastResults(blastRec, protocol, percentId=0.0,
percentOverlap=0.0, chain=False):
"""Create Scipion SetOfBlastResults from ProDy BlastRecord *blastRec*

The hits can be filtered by providing *percent_identity* and *percent_overlap*
to getHits. The argument *chain* specifies whether to return individual chains
as separate hits (default False).
"""
hitsSet = SetOfBlastHits().create(protocol._getExtraPath())
for key, value in blastRec.getHits(percent_identity=percentId,
percent_overlap=percentOverlap,
chain=chain).items():
hit = BlastHit(key, value)
hitsSet.append(hit)

return hitsSet

2 changes: 2 additions & 0 deletions prody2/protocols/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@
from .protocol_pdbfixer import ProDyPDBFixer
from .protocol_clustenm import ProDyClustENM
from .protocol_bioexcel import ProDyBioExcelCV19

from .protocol_blastpdb import ProDyBlastPDB
4 changes: 1 addition & 3 deletions prody2/protocols/protocol_atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@
def notFoundException(inputFn):
return Exception("Atomic structure not found at *%s*" % inputFn)

UNITE_CHAINS_LABEL = "Unite chains in mmCIF segments"
UNITE_CHAINS_HELP = ('Elect whether to unite chains in mmCIF segments for each structure like ChimeraX. '
'Default is **False**, which means the smaller unit IDs are used for chains like PyMOL.')
from prody2.constants import UNITE_CHAINS_LABEL, UNITE_CHAINS_HELP

IMPORT_FROM_ID_CONDITION = 'inputPdbData == IMPORT_FROM_ID'
SUMMARY_NO_OUTPUT = 'Output structure not ready yet'
Expand Down
144 changes: 144 additions & 0 deletions prody2/protocols/protocol_blastpdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
# **************************************************************************
# *
# * Authors: James Krieger (jmkrieger@cnb.csic.es)
# *
# * Centro Nacional de Biotecnologia, CSIC
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307 USA
# *
# * All comments concerning this program package may be sent to the
# * e-mail address 'scipion@cnb.csic.es'
# *
# **************************************************************************


"""
This module will provide the ProDy interface for running an NCBI BLAST
search against the PDB
"""
from pwem.protocols import EMProtocol
from pyworkflow.protocol import params

import prody

from prody2.objects import SetOfBlastHits, createSetOfBlastResults
from prody2.constants import UNITE_CHAINS_LABEL, UNITE_CHAINS_HELP
from prody2 import fixVerbositySecondary, restoreVerbositySecondary


class ProDyBlastPDB(EMProtocol):
"""
This module will provide the ProDy interface for running an
NCBI BLAST search against the PDB
"""
_label = 'BlastPDB'
_possibleOutputs = {'outputResults': SetOfBlastHits}

IMPORT_FROM_STRUCT = 0
IMPORT_FROM_SEQ = 1
USE_TEXT = 2

# -------------------------- DEFINE param functions ----------------------
def _defineParams(self, form):
""" Define the input parameters that will be used.
Params:
form: this is the form to be populated with sections and params.
"""
form.addSection(label='ProDy BlastPDB parsing')

form.addParam('inputSeqData', params.EnumParam, choices=['AtomStruct', 'Sequence', 'text'],
label="Import sequence from",
default=self.USE_TEXT,
display=params.EnumParam.DISPLAY_HLIST,
help='Extract sequence from AtomStruct, Sequence or text')

form.addParam('inputStructure', params.PointerParam, label="Input structure",
condition='inputSeqData == IMPORT_FROM_STRUCT',
pointerClass='AtomStruct',
help='Provide any atomic structure with a sequence')

form.addParam('uniteChains', params.BooleanParam, default=False,
condition='inputSeqData == IMPORT_FROM_STRUCT',
label=UNITE_CHAINS_LABEL,
help=UNITE_CHAINS_HELP)

form.addParam('inputSequence', params.PointerParam, label="Input sequence",
condition='inputSeqData == IMPORT_FROM_SEQ',
pointerClass='Sequence',
help='Provide any Sequence object')

form.addParam('inputSeqText', params.TextParam, width=50,
condition='inputSeqData == USE_TEXT', default="",
label='Input sequence',
help='Defined order of chains from custom matching.')

form.addParam('seqid', params.FloatParam, default=0.,
label="Sequence identity percentage cutoff",
help='Hits with lower percent sequence identity will not be included.\n'
'This can be a number between 0 and 100')

form.addParam('overlap', params.FloatParam, default=0.,
label="Overlap percentage cutoff",
help='Hits with lower percent sequence coverage will not be included.\n'
'This can be a number between 0 and 100')

form.addParam('separateChains', params.BooleanParam, default=False,
label="Whether to separate chains",
help="If true, a separate entry will be provided for each chain")

# --------------------------- STEPS functions ------------------------------
def _insertAllSteps(self):
# Insert processing steps
self._insertFunctionStep('computeStep')
self._insertFunctionStep('createOutputStep')

def computeStep(self):
"""Run blastPDB and save the results to an xml file"""

if self.inputSeqData.get() == self.IMPORT_FROM_STRUCT:

fixVerbositySecondary(self)
ag = prody.parsePDB(self.inputStructure.get().getFileName(),
unite_chains=self.uniteChains.get())
restoreVerbositySecondary(self)

sequence = ag.ca.getSequence()

elif self.inputSeqData.get() == self.IMPORT_FROM_SEQ:
sequence = self.inputSequence.get().getSequence()

else:
sequence = self.inputSeqText.get()

self.blastRec = prody.blastPDB(sequence=sequence,
filename=self._getExtraPath('results.xml'),
timeout=1e10) # ensure completion

def createOutputStep(self):
outputHitsSet = createSetOfBlastResults(self.blastRec, self,
self.seqid.get(),
self.overlap.get(),
self.separateChains.get())
self._defineOutputs(outputResults=outputHitsSet)

def _summary(self):
if not hasattr(self, 'outputResults'):
summ = ['Output results are not ready yet']
else:
summ = ['blastPDB retrieved *{0}* hits with the input criteria'.format(
len(self.outputResults))]
return summ
4 changes: 2 additions & 2 deletions prody2/viewers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from pwem.viewers import DataViewer, showj
from pwem.viewers.viewers_data import RegistryViewerConfig
from prody2.objects import (SetOfTrajFrames, SetOfAtoms,
SetOfClassesTraj)
SetOfClassesTraj, SetOfBlastHits)
DataViewer._targets.extend([SetOfTrajFrames, SetOfAtoms,
SetOfClassesTraj])
SetOfClassesTraj, SetOfBlastHits])

RegistryViewerConfig.registerConfig(SetOfTrajFrames,
{showj.ORDER: 'id enabled label _filename ',
Expand Down