-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_parse_SignalP.pl
61 lines (46 loc) · 1.45 KB
/
run_parse_SignalP.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/perl -w
#
# MODULE 4
# SIGNAL PEPTIDE prediction
#
# Version 1.0 (August 29, 2014)
#
# Juan Carlos González Sánchez
# Centro Andaluz de Biología del Desarrollo (CABD)
# Universidad Pablo de Olavide, Sevilla
#
#
# Usage: run_parse_SignalP fasta speciesname signalp_path
use strict;
use warnings;
use Cwd;
use Getopt::Long;
use IO::Compress::Gzip;
# DEFINE VARIABLES
my $input_file = $ARGV[0];
my $output_name = $ARGV[1];
my $signalp_path = $ARGV[2];
#Output files
my $signalp_output = "SignalP_raw_results_$output_name".".tsv";
my $parsed_output = "02_SP_$output_name".".tsv";
## 1. RUN SIGNALP
system ($signalp_path."/signalp -f short $input_file > $signalp_output");
system ("gzip -f $signalp_output");
# 2. PARSE results
open (IN, "gzip -dcf $signalp_output.gz |") or die $!;
open (OUT, ">$parsed_output") or die $!;
print OUT "#Query\tSignal_Peptide\n";
while (<IN>) {
if ($_ =~ /([^\s][^\#]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(\w)\s+([^\s]+)\s+([^\s]+)/) {
#name $1 #Cmax $2 #pos $3 #Ymax $4 #pos $5 #Smax $6 #pos $7 #Smean $8 #D $9 #? $10 #Dmaxcut $11 #Networks-used $12
print OUT "$1\t";
if ($10 eq "Y") {
print OUT "1-",($3-1),"\n";
} elsif ($10 eq "N") {
print OUT "-\n";
}
}
}
close (IN);
close (OUT);
exit 1;