-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcode_convertChrIdentifier.pl
86 lines (76 loc) · 1.71 KB
/
code_convertChrIdentifier.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use Pod::Usage;
use File::Basename;
#==========
#==========
my $fasta;
my $relationship;
my $out_prefix;
my $help;
GetOptions(
'fasta|f=s' => \$fasta,
'relationship|r=s' => \$relationship,
'out_prefix|o=s' => \$out_prefix,
'help' => \$help
) or pod2usage(-verbose => 0);
pod2usage(-verbose => 0) if $help;
=head1 SYNOPSIS
[Example] perl code_convertChrIdentifier.pl --fasta species.fasta --relationship relationship --out_prefix species.new.fasta
Options:
--fasta|f <STR> reference genome fasta [mandatory]
--relationship|r <STR> how to convert chromosome identifier
original chromosome ID must be the chromosome ID in reference genome fasta.
file format:
<original chromosome ID>\t<new chromosome ID>
e.g.
GWHAMMN00000001 1
--out_prefix|o <STR> prefix of out file
--help or -h output help message
=cut
open(IN,$relationship);
my %hash;
while(<IN>)
{
chomp;
my ($old,$new)=split(/\s+/,$_);
$hash{$old}=$new;
}
close IN;
if($fasta =~ /gz$/)
{
open(IN,"gzip -dc $fasta | ") or die $!;
}else
{
open(IN,$fasta) or die $!;
}
my $out_file=$out_prefix.".fasta.gz";
open(OUT,"|gzip -c > $out_file") or die $!;
my $line=<IN>;
while($line)
{
chomp($line);
if($line =~ /^>/)
{
my ($fasta_name)=split(/\s+/,$line);
$fasta_name=~s/>//;
my $new_name=$hash{$fasta_name};
print OUT ">$new_name\n";
# print ">$fasta_name\t$new_name\n";
$line=<IN>;
while($line)
{
if($line =~ /^>/)
{
last;
}else
{
print OUT "$line";
$line=<IN>;
}
}
}
}
close IN;
close OUT;