-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathdrcln
executable file
·126 lines (94 loc) · 6.92 KB
/
drcln
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env Rscript
library(optparse)
dr.str = "
▓█████▄ ██▀███ ██ ██▓ ▄████▄ ██▓ ▓█████ ▄▄▄ ███▄ █
██▀ ██▌ ▓██ ██ ██ ██ ██▀ ▀█ ▓██▒ ▓█ ▀ ████▄ ██ ▀█ █
░██ █▌ ▓██ ░▄█ ██ ██ ▓█ ▄ ██░ ░███ ██ ▀█▄ ██ ▀█ ██▒
░▓█▄ ▌ ▒██▀▀█▄ ░ ▐██▓ ▒▓▓▄ ▄██▒ ██░ ░▓█ ▄ ██▄▄▄▄█ ██▒ ▐▌██▒
░▒████▓ ░██▓ ██ ░ ██▒ ▓███▀ ░░█████ ▒█████▒ █ █▒ ██░ ▓██░
▒ ▓ ▒ ░ ▓ ░▒▓░ ██ ░ ░▒ ▒ ░░ ▒░▓ ░░░ ▒░ ░▒▒ ▓▒█░░ ▒░ ▒ ▒
░ ▒ ▒ ░▒ ░ ░ ░░▒░ ░ ▒ ░ ░ ▒ ░ ░ ░ ░ ▒ ▒▒ ░░ ░░ ░ ▒░
░ ░ ░ ░░ ░ ░ ░░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░
░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░
░ ░ ░ ░ ░ ░ ░ ░ ░ ░
(Let's dryclean the genomes!)\n"
if (!exists('opt'))
{
option_list = list(
make_option(c("--mode"), type = "character", default = "coverage", help = "Mode of operation: 'pon' or 'coverage'. Set to 'pon' for PON generation and 'coverage' for normalizing a sample using existing PON"),
make_option(c("-p", "--pon"), type = "character", default = NULL, help = "path to the existing Panel Of Normal (PON) saved as .rds"),
make_option(c("-i", "--input"), type = "character", help = "path to the coverage file in GRanges format saved as .rds"),
make_option(c("-C", "--center"), type = "logical", default = TRUE, help = "whether to center the coverage before drycleaning"),
make_option(c("-s", "--cbs"), type = "logical", default = FALSE, help = "whether to perform cbs on the drycleaned coverage"),
make_option(c("-n", "--cnsignif"), type = "integer", default = 1e-5, help = "the significance levels for the test to accept change-points in cbs"),
make_option(c("-c", "--cores"), type = "integer", default = 10, help = "number of cores to use"),
make_option(c("-w", "--wholeGenome"), default = TRUE, type = "logical", help = "whether whole genome is being used"),
make_option(c("-b", "--blacklist"), default = FALSE, type = "logical", help = "whether there are blacklisted makers"),
make_option(c("-l", "--blacklist_path"), default = NA, type = "character", help = "if --blacklist == TRUE, path to a GRanges object marking if each marker is set to be excluded or not"),
make_option(c("-g", "--germline.filter"), default = FALSE, type = "logical", help = "if PON based germline filter is to be used for removing some common germline events, if set to TRUE, give path to germline annotated file"),
make_option(c("-f", "--germline.file"), default = NA, type = "character", help = "path to file annotated with germline calls, if germline.filter == TRUE"),
make_option(c("-F", "--field"), type = "character", default = 'reads.corrected', help = "field name in GRanges metadata to use for drycleaning"),
make_option(c("-B", "--build"), type = "character", default = 'hg19', help = "hg19/hg38 build for human samples"),
make_option(c("-T", "--testing"), type = "character", default = FALSE, help = "DO NOT CHANGE"),
make_option(c("--normal_vector"), type = "character", default = NULL, help = "if mode = 'pon', path to a vector containing normal coverages in GRanges format saved as .rds"),
make_option(c("--field_pon"), type = "character", default = 'reads.corrected', help = "field name in GRanges metadata of normal samples to use for pon generation"),
make_option(c("-o", "--outdir"), type = "character", default = './', help = "output directory"),
make_option(c("-h", "--help"), action="store_true", default=FALSE, help="show this help message and exit")
)
parseobj = OptionParser(option_list=option_list, add_help_option=FALSE)
opt = parse_args(parseobj)
options(error=function()traceback(2))
## keep record of run
writeLines(paste(paste('--', names(opt), ' ', sapply(opt, function(x) paste(x, collapse = ',')), sep = '', collapse = ' '), sep = ''), paste(opt$outdir, 'cmd.args', sep = '/'))
saveRDS(opt, paste(opt$outdir, 'cmd.args.rds', sep = '/'))
}
system(paste('mkdir -p', opt$outdir))
##############################
suppressWarnings(suppressPackageStartupMessages(library(dryclean)))
message(dr.str)
suppressWarnings(suppressPackageStartupMessages(library(dplyr)))
suppressWarnings(suppressPackageStartupMessages(library(GenomicRanges)))
if(opt$mode != "coverage" & opt$mode != "pon"){
stop("Invalid mode: set --mode flag as 'pon' or 'coverage'")
}
if(opt$mode == "pon"){
if(is.null(opt$normal_vector)){
stop("Missing the path to normal_vector (--normal_vector flag). Run ./drcln -h for help")
}
normal.vector <- readRDS(opt$normal_vector)
pon_object <- pon$new(
normal_vector = normal.vector,
pon_path = paste0(opt$outdir, 'pon.rds'),
create_new_pon = TRUE,
save_pon = TRUE,
field = opt$field_pon,
build = opt$build,
num.cores = opt$cores
)
}
if(opt$mode == "coverage"){
if(is.null(opt$input)){
stop("Missing coverage to normalize (--input flag). Run ./drcln -h for help")
}
if(is.null(opt$pon)){
stop("Missing path to PON (--pon flag). Run ./drcln -h for help")
}
pon_object <- pon$new(pon_path = opt$pon, build = opt$build)
dryclean_object <- dryclean$new(pon = pon_object)
a <- dryclean_object$clean(
cov = opt$input,
center = opt$center,
cbs = opt$cbs,
cnsignif = opt$cnsignif,
mc.cores = opt$cores,
verbose = TRUE,
use.blacklist = opt$blacklist,
blacklist_path = opt$blacklist_path,
germline.filter = opt$germline.filter,
field = opt$field,
testing = opt$testing
)
this.out = a
saveRDS(this.out, paste0(opt$outdir, 'drycleaned.cov.rds'))
}
message('Giddy Up!')