-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathK-get_fasta_from_ids.pl
120 lines (78 loc) · 1.97 KB
/
K-get_fasta_from_ids.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/perl
# AuthorÊÊÊÊÊÊÊ : Ksenia
# DateÊÊÊÊÊÊÊÊÊ : Wed AugÊ 8 20:42:38 UTC 2007
# DescriptionÊÊ : Given an identifier, find a sequence in the database file
# Feed a file containg ids and a database to the script
use strict;
use warnings;
use Getopt::Long;
my $usage = "Usage: perl K-get_fasta_from_ids.pl <options>\n
Necessary options for the script to run:
-i|--ids location of the file with sequence ids
-f|--fasta location of the fasta file
-o|--output location of the output file
";
my ($fasta, $ids_file, $output_file);
GetOptions(
'f|fasta:s' => \$fasta,
'i|ids:s' => \$ids_file,
'o|output:s' => \$output_file,
);
die $usage unless ( defined($fasta) and defined($ids_file) );
my %ids;
my @info;
open (my $FILEHANDLE, "<", $ids_file) or die "cannot open this file";
my $FILEOUT;
if (defined($output_file)) {
open ($FILEOUT, ">", $output_file);
}
while (my $ids_line = <$FILEHANDLE>){
chomp $ids_line;
@info = split (/\s/, $ids_line);
$info[0] =~ s/\>//;
$info[0] =~ s/,//;
$ids{$info[0]} = 1;
}
close $FILEHANDLE;
my %sequence;
my %header;
my @query_info;
my $query;
open (FILE1, "<", $fasta) or die "cannot open this file";
while ( my $line = <FILE1> ){
chomp $line;
if ($line=~ m/\>/) {
@query_info = split (/\s/, $line);
$query = shift(@query_info);
$query =~ s/\>//;
$query =~ s/,//;
#for ORF Predictor
#$query =~ s/ .+//;
#use for Unigene only
$query =~ s/gnl\|UG\|//;
$header{$query} = join(" ", @query_info);
}
elsif($sequence{$query}) {
$sequence{$query} = $sequence{$query} . $line;
}
else{
$sequence{$query} = $line;
}
}
close FILE1;
my $key;
my $value;
while (($key, $value) = each(%sequence)){
if ($ids{$key}){
if (defined($output_file)){
print $FILEOUT ">", $key, " ", $header{$key}, "\n", $sequence{$key}, "\n";
}
else {
print ">", $key, " ", $header{$key}, "\n", $sequence{$key}, "\n";
}
}
else {
#print ">", $key, " ", $header{$key}, "\n", $sequence{$key}, "\n";
}
}
__END__