Improve description of rMETL usage more friendly.

tjiangHIT · May 7, 2019 · 5ebc05f · 5ebc05f
1 parent f401c7f
commit 5ebc05f
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 29 deletions.
diff --git a/src/rMETL.py b/src/rMETL.py
@@ -33,9 +33,10 @@
   STAGE is one of
     detection    Inference of putative MEI loci.
     realignment  Realignment of chimeric read parts.
-    calling      Mobile Element Insertion calling.
+    calling      Mobile Element Insertion/Deletion calling.
     
   See README.md for documentation or --help for details
+  Strongly recommend making output directory manually at first.
   
   rMETL V%s
   Author: %s

diff --git a/src/rMETL_MEIcalling.py b/src/rMETL_MEIcalling.py
@@ -32,9 +32,10 @@
 
     rMETL - realignment-based Mobile Element insertion detection Tool for Long read
 
-	rMETL MEI calling.
-
-	Optional output format: .bed or .vcf
+	Generate final MEI/MED callset in bed or vcf file.
+	
+	The output file called 'calling.bed' or 'calling.vcf'
+	stores in output directory.
 	
 	rMETL V%s
 	Author: %s
@@ -313,12 +314,12 @@ def call_vcf(args):
 def parseArgs(argv):
 	parser = argparse.ArgumentParser(prog="rMETL.py calling", description=USAGE, \
 		formatter_class=argparse.RawDescriptionHelpFormatter)
-	parser.add_argument("input", metavar="SAM", type=str, help="Input cluster.sam.")
+	parser.add_argument("input", metavar="SAM", type=str, help="Input cluster.sam on STAGE realignment.")
 	parser.add_argument("Reference", metavar="REFERENCE", type=str, \
-		help="The reference genome(fasta format).")
+		help="The reference genome in fasta format.")
 	parser.add_argument("format", metavar="[BED,VCF]", type=str, \
 		help="The format of the output file. [%(default)s]", default = "bed")
-	parser.add_argument('output', type=str, help = "Prefix of final call set.")
+	parser.add_argument('output', type=str, help = "Directory to output final callset.")
 	parser.add_argument('-hom', '--homozygous', \
 		help = "The mininum score of a genotyping reported as a homozygous.[%(default)s]", \
 		default = 0.8, type = float)
@@ -330,7 +331,7 @@ def parseArgs(argv):
 	parser.add_argument('-c', '--clipping_threshold', \
 		help = "Mininum threshold of realignment clipping.[%(default)s]", \
 		default = 0.5, type = float)
-	parser.add_argument('--sample', help = "The name of the sample that is noted.", \
+	parser.add_argument('--sample', help = "Sample description", \
 		default = "None", type = str)
 	parser.add_argument('--MEI', help = "Enables rMETL to display MEI/MED only.[%(default)s]", \
 		default = "True", type = str)
@@ -346,7 +347,7 @@ def run(argv):
     elif args.format == "vcf":
     	call_vcf(args)
     else:
-    	logging.error("The format is available.")
+    	logging.error("Invalid format.")
     	exit(1)
     logging.info("Finished in %0.2f seconds."%(time.time() - starttime))
 

diff --git a/src/rMETL_extraction.py b/src/rMETL_extraction.py
@@ -37,19 +37,19 @@
     rMETL - realignment-based Mobile Element insertion detection Tool for Long read
 
 
-	Map reads using NGMLR and Samtools to produce .bam file.
+	Support reads aligned with Ngmlr and sorted with Samtools
 
-	If input is a .fastq, or .fasta, we do the initial mapping
-	for you all at once.
+	If input is a fastq or fasta format file, rMETL generates
+	alignments with Ngmlr at first;
 
-	If input is a .sam, we convert and sort it to be a bam, 
-	and then make an index for it.  
+	If input is a sam format file, rMETL converts and sorts it
+	to be a bam format file;
 
-	If your input is a .bam, we extract the ME signatures and
-	collect the sub-sequence of them.
+	If your input is a bam format file with index, rMETL extracts
+	the ME signatures and collects the sub-sequence of them.
 
-	The output is a .fasta file contains potentials non-reference
-	ME clusters.
+	The output is a fasta format file called 'potential.fa' 
+	contains potentials non-reference ME clusters.
 
 	rMETL V%s
 	Author: %s
@@ -428,7 +428,7 @@ def single_pipe(out_path, chr, bam_path, low_bandary, evidence_read, SV_size):
 	'''
 	samfile = pysam.AlignmentFile(bam_path)
 	CLIP_note = dict()
-	logging.info("Resolving the chromsome %s."%(chr))
+	logging.info("Resolving chromsome %s."%(chr))
 	if chr not in CLIP_note:
 		CLIP_note[chr] = dict()
 	cluster_pos_INS = list()
@@ -455,7 +455,7 @@ def single_pipe(out_path, chr, bam_path, low_bandary, evidence_read, SV_size):
 			SV_size, low_bandary)
 		del cluster_pos_DEL
 		gc.collect()
-	logging.info("%d MEI signal locuses in the chromsome %s."%(len(Cluster_INS)+\
+	logging.info("%d MEI/MED signal loci in the chromsome %s."%(len(Cluster_INS)+\
 		len(Cluster_DEL), chr))
 	combine_result(add_genotype(Cluster_INS, samfile, low_bandary), \
 		add_genotype(Cluster_DEL, samfile, low_bandary), out_path, chr)
@@ -528,12 +528,12 @@ def parseArgs(argv):
 	parser = argparse.ArgumentParser(prog="rMETL.py detection", \
 		description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter)
 	parser.add_argument("input", metavar="[SAM,BAM,FASTA,FASTQ]", type=str, \
-		help="Input [Mapped/Unmapped] reads.")
+		help="Input reads with/without alignment.")
 	parser.add_argument("Reference", metavar="REFERENCE", type=str, \
-		help="The reference genome (fasta format).")
+		help="The reference genome in fasta format.")
 	parser.add_argument('temp_dir', type=str, \
 		help = "Temporary directory to use for distributed jobs.")
-	parser.add_argument('output', type=str, \
+	parser.add_argument('output_dir', type=str, \
 		help = "Directory to output potential ME loci.")
 	parser.add_argument('-s', '--min_support',\
 	 help = "Mininum number of reads that support a ME.[%(default)s]", \
@@ -542,7 +542,7 @@ def parseArgs(argv):
 		help = "Mininum length of ME to be reported.[%(default)s]", \
 		default = 50, type = int)
 	parser.add_argument('-d', '--min_distance', \
-		help = "Mininum distance of two ME clusters to be intergrated.[%(default)s]", \
+		help = "Mininum distance of two ME signatures to be intergrated.[%(default)s]", \
 		default = 20, type = int)
 	parser.add_argument('-t', '--threads', \
 		help = "Number of threads to use.[%(default)s]", default = 8, \

diff --git a/src/rMETL_realign.py b/src/rMETL_realign.py
@@ -34,7 +34,7 @@
 	TE refs: Alu concensus
 		 L1 concensus
 		 SVA concensus
-	The output of this script is a .sam file.
+	The output is a sam format file called 'cluster.sam'.
 
 	rMETL V%s
 	Author: %s
@@ -69,9 +69,9 @@ def call_ngmlr(inFile, ref, presets, nproc, outFile, SUBREAD_LENGTH, SUBREAD_COR
 def parseArgs(argv):
 	parser = argparse.ArgumentParser(prog="rMETL.py realignment", description=USAGE, \
 		formatter_class=argparse.RawDescriptionHelpFormatter)
-	parser.add_argument("input", metavar="FASTA", type=str, help="Input potential_ME.fa.")
-	parser.add_argument("ME_Ref", type=str, help="The reference genome(fasta format).")
-	parser.add_argument('output', type=str, help = "Prefix of potential ME classification.")
+	parser.add_argument("input", metavar="FASTA", type=str, help="Input potential_ME.fa on STAGE detection.")
+	parser.add_argument("ME_Ref", type=str, help="The transposable element concensus in fasta format.")
+	parser.add_argument('output', type=str, help = "Directory to output realignments.")
 	parser.add_argument('-t', '--threads', help = "Number of threads to use.[%(default)s]", \
 		default = 8, type = int)
 	parser.add_argument('-x', '--presets', \

diff --git a/src/rMETL_version.py b/src/rMETL_version.py
@@ -1,5 +1,5 @@
 # * @author: Jiang Tao (tjiang@hit.edu.cn)
 
-__version__ = '1.0.2'
+__version__ = '1.0.3'
 __author__ = 'Jiang Tao'
 __contact__ = 'tjiang@hit.edu.cn'