-
Notifications
You must be signed in to change notification settings - Fork 17
/
Varinfo_gds.R
59 lines (45 loc) · 1.65 KB
/
Varinfo_gds.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
rm(list=ls())
gc()
##########################################################################
# Input
##########################################################################
### DB split information
file_DBsplit <- "/path_to_the_file/FAVORdatabase_chrsplit.csv"
### Targeted GDS
dir_geno <- "/path_to_the_GDS_file/"
gds_file_name_1 <- "freeze.5.chr"
gds_file_name_2 <- ".pass_and_fail.gtonly.minDP0.gds"
### output
output_path <- "/path_to_the_output_file/"
### input array id from batch file
chr <- as.numeric(commandArgs(TRUE)[1])
###########################################################################
# Main Function
###########################################################################
### make directory
system(paste0("mkdir ",output_path,"chr",chr))
### R package
library(gdsfmt)
library(SeqArray)
library(SeqVarTools)
### chromosome number
## read info
DB_info <- read.csv(file_DBsplit,header=TRUE)
DB_info <- DB_info[DB_info$Chr==chr,]
## open GDS
gds.path <- paste0(dir_geno,gds_file_name_1,chr,gds_file_name_2)
genofile <- seqOpen(gds.path)
CHR <- as.numeric(seqGetData(genofile, "chromosome"))
position <- as.integer(seqGetData(genofile, "position"))
REF <- as.character(seqGetData(genofile, "$ref"))
ALT <- as.character(seqGetData(genofile, "$alt"))
VarInfo_genome <- paste0(CHR,"-",position,"-",REF,"-",ALT)
seqClose(genofile)
## Generate VarInfo
for(kk in 1:dim(DB_info)[1])
{
print(kk)
VarInfo <- VarInfo_genome[(position>=DB_info$Start_Pos[kk])&(position<=DB_info$End_Pos[kk])]
VarInfo <- data.frame(VarInfo)
write.csv(VarInfo,paste0(output_path,"chr",chr,"/VarInfo_chr",chr,"_",kk,".csv"),quote=FALSE,row.names = FALSE)
}