diff --git a/README.md b/README.md index 8ddd52fe..bfc8183a 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,13 @@ structures. - Motifs for DrPlotter can also contain 'x' in the dot-bracket notation for *must be unpaired*. ## Version -v0.9 -- standalone package +v0.10 -- moved to beta status (first official release) + * changes in parameter defaults + * bugfix in linalg + * new DrPlotter simulation layout and motif plotting + * repaired code to enable plotting including pause sites + +v0.9 -- standalone package (no official release) * extraction from the [ribolands] package to a standalone Python package. * using scipy and numpy for matrix exponentials (instead of [treekin]) * implemented lookahead to skip pruning of potentially relevant future structures diff --git a/drtransformer/__init__.py b/drtransformer/__init__.py index 2283ace8..6fa9649f 100644 --- a/drtransformer/__init__.py +++ b/drtransformer/__init__.py @@ -7,7 +7,7 @@ # See README for instructions. # -__version__ = "0.9" +__version__ = "0.10" _MIN_VRNA_VERSION = "2.4.13" diff --git a/drtransformer/drtransformer.py b/drtransformer/drtransformer.py index 568701f2..6ea80a49 100755 --- a/drtransformer/drtransformer.py +++ b/drtransformer/drtransformer.py @@ -6,9 +6,7 @@ import os, sys, argparse import math import numpy as np - -# Debugging only -from datetime import datetime +from datetime import datetime # Performance report import RNA from . import __version__ @@ -59,20 +57,20 @@ def parse_drtrafo_args(parser): help = """Write verbose information to a file: {--outdir}/{--name}.log""") - output.add_argument("--tmpdir", default = '', action = 'store', metavar = '', - help = """Specify path for storing landscape files for debugging. These - files will not be removed when the program terminates. """) - output.add_argument("--outdir", default = '', action = 'store', metavar = '', help = """Place regular output files, into this directory. Creates the directory if it does not exist. """) + output.add_argument("--tmpdir", default = '', action = 'store', metavar = '', + help = """Specify path for storing landscape files for debugging. These + files will not be removed when the program terminates. """) + output.add_argument("--no-timecourse", action = "store_true", help = """Do not produce the time-course file (outdir/name.drf).""") output.add_argument("--plot-minh", type = float, default = None, metavar = '', help = """Coarsify the output based on a minimum barrier height. In contrast - to t-fast, this does *not* speed up the simulation.""") + to t-fast, this does *not* affect the accuracy of the model.""") output.add_argument("--t-lin", type = int, default = 30, metavar = '', help = """Evenly space output *--t-lin* times during transcription on a linear time scale.""") @@ -80,11 +78,15 @@ def parse_drtrafo_args(parser): output.add_argument("--t-log", type = int, default = 300, metavar = '', help = """Evenly space output *--t-log* times after transcription on a logarithmic time scale.""") + output.add_argument("--performance-report", action = "store_true", + # prints datetimes and statprof data for performance analysis + help = argparse.SUPPRESS) + ############################ # Transcription parameters # ############################ trans.add_argument("--t-ext", type = float, default = 0.02, metavar = '', - help = """Transcription speed, i.e. time per nucleotide extension + help = """Inverse of transcription rate, i.e. time per nucleotide extension [seconds per nucleotide].""") trans.add_argument("--t-end", type = float, default = 3600, metavar = '', @@ -127,14 +129,14 @@ def parse_drtrafo_args(parser): algo.add_argument("--fpwm", type = int, default = 4, metavar = '', help = """Findpath search width multiplier. This value times the base-pair distance results in the findpath search width. Higher - values increase the chances to find energetically better transition - state energies.""") + values increase the chances to find energetically lower saddle + energies for the transition. """) algo.add_argument("--mfree", type = int, default = 6, metavar = '', help = """Minimum number of freed bases during helix fraying. Fraying helices can vary greatly in length, starting with at least two base-pairs. This parameter defines the minimum amount of - bases freed by helix fraying. For example, 6 corresponds to a + bases freed by helix fraying. For example, 6 can correspond to a stack of two base-pairs and a loop region of 2 nucleotides. If less bases are freed and there exists a nested stacked helix, this helix is considered to fray as well.""") @@ -370,8 +372,9 @@ def main(): # ~~~~~~~~~ # ############# - #import statprof - #statprof.start() + if args.performance_report: + import statprof + statprof.start() time = 0 for tlen in range(args.start, args.stop+1): @@ -380,7 +383,7 @@ def main(): itime = datetime.now() # Get new nodes and connect them. - nn, on = TL.expand() + nn, on, prep = TL.expand(args.performance_report) logger.info(f'After expansion: {len(list(TL.active_nodes)):3d} active structures, {len(list(TL.inactive_nodes)):3d} inactive structures.' + f' (Found {len(nn)} new nodes and revisited {len(on)} pruned nodes.)') etime = datetime.now() @@ -487,19 +490,21 @@ def main(): if args.o_prune > 0: delth = args.delth pn, dn = TL.prune(args.o_prune, delth, keep) - logger.info(f'After pruning: {len(list(TL.active_local_mins)):3d} active lmins, {len(list(TL.active_nodes)):3d} active structures, {len(list(TL.inactive_nodes)):3d} inactive structures.\n' + - f' (Pruned {len(pn)} nodes with combined occupancy below {args.o_prune:.2f}, kept {len(keep)} nodes due to lookahead, deleted {len(dn)} nodes inactive for {delth} transcription steps.)') + logger.info(f'After pruning: {len(list(TL.active_local_mins)):3d} active lmins,' + + f' {len(list(TL.active_nodes)):3d} active structures, {len(list(TL.inactive_nodes)):3d} inactive structures.' + + f' (Pruned {len(pn)} nodes, kept {len(keep)} nodes due to lookahead, deleted {len(dn)} inactive nodes.)') ptime = datetime.now() - if args.verbose: - exptime = (etime - itime).total_seconds() - cgntime = (ctime - etime).total_seconds() - simtime = (stime - ctime).total_seconds() - prntime = (ptime - stime).total_seconds() - tottime = (ptime - itime).total_seconds() - logger.info(f'{tlen=}, {tottime=}, {exptime=}, {cgntime=}, {simtime=}, {prntime=}.') - #print(tlen, tottime, exptime, cgntime, simtime, prntime) - #sys.stdout.flush() + exptime = (etime - itime).total_seconds() + cgntime = (ctime - etime).total_seconds() + simtime = (stime - ctime).total_seconds() + prntime = (ptime - stime).total_seconds() + tottime = (ptime - itime).total_seconds() + logger.info(f'{tlen=}, {tottime=}, {exptime=}, {cgntime=}, {simtime=}, {prntime=}.') + if args.performance_report: + (exptime, frayytime, guidetime, floodtime) = prep + print(tlen, tottime, frayytime, guidetime, floodtime, exptime, cgntime, simtime, prntime) + sys.stdout.flush() # Write the last results if args.stdout == 'log' or lfh: @@ -509,7 +514,9 @@ def main(): lnodes, pX = TL.get_occupancies() if args.plot_minh: for e, node in enumerate(snodes): - if node not in plot_cgm: #TODO check about lnodes + if node not in lnodes: + continue + if node not in plot_cgm: continue ne = TL.nodes[node]['energy']/100 no = p8[e] + sum(p8[snodes.index(n)]/len(mapping[n]) for n in plot_cgm[node]) @@ -518,7 +525,7 @@ def main(): nids = [TL.nodes[n]['identity'] for n in sorted(plot_cgm[node], key = lambda x: TL.nodes[x]['identity'])] if nids: ni += f" + {' + '.join(nids)}" - fdata += f"{tlen:4d} {e+1:4d} {node[:tlen]} {ne:6.2f} {no:6.4f} (eq: {eo:6.4f}) ID = {ni}\n" + fdata += f"{tlen:4d} {e+1:4d} {node[:tlen]} {ne:6.2f} {no:6.4f} [-\u221e-> {eo:6.4f}] ID = {ni}\n" write_output(fdata, stdout = (args.stdout == 'log'), fh = lfh) else: for e, node in enumerate(snodes): @@ -531,12 +538,14 @@ def main(): fdata += f"{tlen:4d} {e:4d} {node[:tlen]} {ne:6.2f} {no:6.4f} [-\u221e-> {eo:6.4f}] ID = {ni}\n" write_output(fdata, stdout=(args.stdout == 'log'), fh = lfh) - #statprof.stop() - #statprof.display() - # CLEANUP file handles if lfh: lfh.close() if dfh: dfh.close() + + if args.performance_report: + statprof.stop() + statprof.display() + return if __name__ == '__main__': diff --git a/drtransformer/landscape.py b/drtransformer/landscape.py index d5fe8fd3..0786cd78 100644 --- a/drtransformer/landscape.py +++ b/drtransformer/landscape.py @@ -179,7 +179,7 @@ def __repr__(self): # Algorithmic part # # ================ # - def expand(self): + def expand(self, performance_report = False): """ Find new secondary structures and determine their neighborhood in the landscape. The function adds to types of new structures: @@ -210,6 +210,7 @@ def expand(self): self.addnode(mfess, structure = mfess, occupancy = 1) nn = set([mfess]) on = set() + pr = (0, 0, 0, 0) if performance_report else None else: md = self.md fpwm = self.fpwm @@ -286,9 +287,9 @@ def expand(self): guidetime = (g_time - f_time).total_seconds() floodtime = (l_time - g_time).total_seconds() tottime = (l_time - i_time).total_seconds() - #print(len(seq), tottime, frayytime, guidetime, floodtime) - #sys.stdout.flush() - return nn, on + drlog.debug(f'{len(seq)=}, {tottime=}, {frayytime=}, {guidetime=}, {floodtime=}.') + pr = (tottime, frayytime, guidetime, floodtime) if performance_report else None + return nn, on, pr def get_coarse_network(self): """ Produce a smaller graph of local minima and best connections. diff --git a/drtransformer/linalg.py b/drtransformer/linalg.py index bcdc6476..02159d58 100644 --- a/drtransformer/linalg.py +++ b/drtransformer/linalg.py @@ -53,7 +53,7 @@ def get_p8_detbal(A): else: p8[j] = p8[i] * (A[i][j] / A[j][i]) count += 1 - assert 0 <= p8[j] <= p8[0], "unreasonable equilibrium occupancy in p8" + assert 0 <= p8[j] <= p8[0] or np.isclose(p8[j], p8[0]), f"unreasonable equilibrium occupancy in p8: {j}, {p8[j]}" # Given i, we have looped through all j done[i] = 1 # Determine next i for solving @@ -144,7 +144,7 @@ def mx_simulate(A, p0, times, force = None, atol = 1e-8, rtol = 1e-12): last = -1 try: p8 = get_p8_detbal(A) - drlog.info(f"Equilibrium distribution vector p8 ({sum(p8)=}):\n{mx_print([p8])}") + drlog.debug(f"Equilibrium distribution vector p8 ({sum(p8)=}):\n{mx_print([p8])}") if not all(p > 0 for p in p8): raise MxLinalgError(f"Negative elements in p8 ({p8=})") @@ -183,24 +183,24 @@ def mx_simulate(A, p0, times, force = None, atol = 1e-8, rtol = 1e-12): yield t, np.absolute(pt/sum(np.absolute(pt))) last = t if np.allclose(pt, p8): # No need to calculate any more timepoints! - drlog.info(f'Equilibrium reached at time {t=}.') + drlog.debug(f'Equilibrium reached at time {t=}.') for ft in force: if ft > t: yield ft, p8 return except MxLinalgError as err: - drlog.info(f"Exception due to Error: {str(err)}") - R = R.T + drlog.debug(f"Exception due to Error: {str(err)}") + A = A.T for t in times: if t <= last: continue - pt = sl.expm(R * t).dot(p0) + pt = sl.expm(A * t).dot(p0) if not np.isclose(sum(pt), 1., rtol = rtol, atol = atol): raise MxLinalgError('Unstable simulation at time {t=}: {sum(pt)=}') yield t, np.absolute(pt/sum(np.absolute(pt))) if np.allclose(pt, p8): # No need to calculate any more timepoints! - drlog.info(f'Equilibrium reached at time {t=}.') + drlog.debug(f'Equilibrium reached at time {t=}.') for ft in force: if ft > t: yield ft, p8 diff --git a/setup.py b/setup.py index f3207e74..72831878 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = 'drtransformer', - version = '0.9', + version = '0.10', description = 'Heuristic cotranscriptional folding using the nearest neighbor energy model.', long_description = LONG_DESCRIPTION, long_description_content_type = 'text/markdown', @@ -17,7 +17,7 @@ url = 'https://github.com/bad-ants-fleet/drtransformer', license = 'ViennaRNA', classifiers = [ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Intended Audience :: Science/Research',