forked from barricklab/barricklab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgene_gc_length.pl
executable file
·112 lines (75 loc) · 2.13 KB
/
gene_gc_length.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env perl -w
###
# Pod Documentation
###
=head1 NAME
batch_run.pl
=head1 SYNOPSIS
Usage: gene_gc_length.pl "[command]"
Run a command within each directory contained within the current directory.
Enclose the command in quotes. All appearances of '#d' in the command will
be replaced with the name of the current directory.
=head1 DESCRIPTION
=over
=item B<-p> <pattern>
Only execute in directories that match this regular expression.
Directories beginning with a period '.' or underscore '_' are always ignored.
=item B<-t>
Test mode. Print commands that would have been run.
=item B<-c>
Alternate way of passing the command to be run.
=back
=head1 AUTHOR
Jeffrey Barrick
=head1 COPYRIGHT
Copyright 2006. All rights reserved.
=cut
###
# End Pod Documentation
###
use strict;
use FindBin;
use lib $FindBin::Bin;
use Data::Dumper;
use Bio::SeqIO;
#Get options
use Getopt::Long;
use Pod::Usage;
my ($help, $man);
my ($input_file, $output_file);
pod2usage(1) if (scalar @ARGV == 0);
GetOptions(
'help|?' => \$help, 'man' => \$man,
'input|i=s' => \$input_file,
'output|o=s' => \$output_file,
) or pod2usage(2);
pod2usage(1) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
pod2usage(1) if (!$input_file);
pod2usage(1) if (!$output_file);
open OUT, ">$output_file";
print OUT join(",", ("gene", "length", "gccontent") ) . "\n";
my $in = Bio::SeqIO->new( -file => "$input_file", -format => "GENBANK");
while (my $seq_object = $in->next_seq) {
for my $feat_object ($seq_object->get_SeqFeatures) {
my @line_list = ();
if ($feat_object->primary_tag eq "gene") {
push @line_list, $feat_object->get_tag_values('locus_tag');
#Calculate gene length
my $seq = $feat_object->seq()->seq;
#print $seq . "\n";
push @line_list, length $seq;
my $gc = 0;
#Calculate GC content
my @seq_chars = split //, $seq;
for my $c (@seq_chars) {
if ( ("\U$c" eq "G") || ("\U$c" eq "C") ) {
$gc++;
}
}
$gc /= length $seq;
push @line_list, $gc;
print OUT join(",", @line_list) . "\n";
}
}
}