-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathwoland-bed.pl
executable file
·102 lines (87 loc) · 2.83 KB
/
woland-bed.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#####################################################################################################################################
## WOLAND Beta 1.01 (09-30-2017)
## woland-bed.pl
##
## WOLAND is a multiplatform tool to analyze point mutation patterns using resequencing data from any organism or cell.
##
## For more details please read README file.
##
######################################################################################################################################
#! /usr/bin/perl
use List::Util qw(sum); #module for sum of chromosome coordinates
use List::MoreUtils qw(uniq);
use strict;
use warnings;
use Getopt::ArgParse;
our $REVISION = '$Revision: $';
our $DATE = '$Date: 2017-09-30 00:11:04 -0800 (Sat, 30 Sep 2017) $';
our $AUTHOR = '$Author: Tiago A. de Souza <tiagoantonio@gmail.com> $';
our (@chrbed,@pos1,@pos2, @sumlength,@uniquechr);
our $bedfile;
sub parse_bedfile{ # loading bed file
my $args = $_[0];
# $bedfile = $ARGV[0];
$bedfile = $args->bed_file;
open (BEDFILE, $bedfile);
my @bedfilearray=<BEDFILE>;
close (BEDFILE);
my $rawbedline;
foreach $rawbedline (@bedfilearray){
my @i = split (/\t/, $rawbedline);
chomp (@i);
push (@chrbed, "$i[0]");
push (@pos1, $i[1]);
push (@pos2, $i[2]);
}
shift(@chrbed);
@uniquechr = uniq @chrbed; #unique chromosome names
for my $i (0..$#uniquechr){
&calculate_length($uniquechr[$i]);
}
}
sub calculate_length{ #absolute subtraction of pos values
my @length;
for my $i(0..$#chrbed){
if($chrbed[$i] eq "$_[0]"){
$length[$i]=abs($pos2[$i]-$pos1[$i]);
}
if($chrbed[$i] ne "$_[0]"){
$length[$i]=0;
}
}
push (@sumlength, sum(@length));
}
my $ap = Getopt::ArgParse->new_parser(
prog => 'woland-bed.pl',
description => 'WOLAND is a multiplatform tool to analyze point mutation patterns using resequencing SNV data.
Use woland-bed.pl to calculate nucleotide length using a .BED coordinate file as input and to build <chromosome_length_profile> for other woland scripts.For more details please read README',
epilog => 'If you used Woland in your research, we would appreciate your citation:
de Souza TA, Defelicibus A, Menck CF',
);
$ap->add_arg(
'--bed-file',
'-b',
required => 1,
help => 'BED-formatted file of targeted-sequencing regions');
my $args = $ap->parse_args();
## main warning
# unless ($#ARGV==0){
# die "\nERROR : Incorrect number of arguments - Usage: $0 <file.bed> \n\n";
# }
unless (-r -e -f $args->bed_file){
die sprintf("\nERROR: %s not exists or is not readable or not properly formatted. Please check file.\n\n",
$args->bed_file);
}
&parse_bedfile($args);
open (PROFILE, ">>woland-bed-profile-$bedfile"); # printing profile file
for my $i (0..$#uniquechr){
print PROFILE "$uniquechr[$i]\t";
if ($sumlength[$i] ne 0){
print PROFILE "$sumlength[$i]\n";
}
else{
print PROFILE "\1\n";
}
}
close(PROFILE);
exit;