diff --git a/DESCRIPTION b/DESCRIPTION index fcb0b66..cf81aaf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Rpairix Title: Rpairix -Version: 0.1.0 +Version: 0.1.1 Authors@R: person("Soo", "Lee", email = "duplexa@gmail.com", role = c("aut", "cre")) Description: R binder for pairix, tool for querying a pair of genomic ranges in a pairs file (pairix-indexed bgzipped text file) Depends: diff --git a/R/px_build_index.R b/R/px_build_index.R index b04cdce..1c4fd8f 100644 --- a/R/px_build_index.R +++ b/R/px_build_index.R @@ -40,6 +40,7 @@ px_build_index<-function(filename, preset='', sc=0, bc=0, ec=0, sc2=0, bc2=0, ec if(out[[13]][1] == -2) { message("Can't recognize preset."); return(-1); } if(out[[13]][1] == -3) { message("Was bgzip used to compress this file?"); return(-1); } if(out[[13]][1] == -4) { message("The index file exists. Please use force=TRUE to overwrite"); return(-1); } + if(out[[13]][1] == -5) { message("Can't recognize file type, with no preset specified."); return(-1); } return(0); } diff --git a/R/px_query.R b/R/px_query.R index ec3c8c5..a80860e 100644 --- a/R/px_query.R +++ b/R/px_query.R @@ -15,7 +15,7 @@ #' #' ## 2D-indexed file #' filename = system.file(".","test_4dn.pairs.gz", package="Rpairix") -#' querystr = c("chr10|chr20","chr2|chr2") +#' querystr = c("chr10|chr20","chr22|chr22") #' res = px_query(filename, querystr) #' print(res) #' @@ -48,7 +48,7 @@ #' #' ## 1D-indexed file #' filename = system.file(".","SRR1171591.variants.snp.vqsr.p.vcf.gz", package="Rpairix") -#' querystr = 'chr10' +#' querystr = 'chr10|5000000-20000000' #' res = px_query(filename, querystr) #' print(res) #' diff --git a/README.md b/README.md index 730f849..bbe5b9a 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ R --no-site-file --no-environ --no-save --no-restore CMD INSTALL --install-tests To install a specific version, ``` library(devtools) -install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.0.zip") +install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.1.zip") ``` @@ -78,6 +78,7 @@ sc2 second sequence (chromosome) column index (1-based). Zero (0) means not spec * `line_skip` : number of lines to skip in the beginning. (default 0) * `force` : If TRUE, overwrite existing index file. If FALSE, do not overwrite unless the index file is older than the bgzipped file. (default FALSE) * An index file sometextfile.gz.px2 will be created. +* When neither `preset` nor `sc`(and `bc`) is given, the following file extensions are automatically recognized: `gff.gz`, `bed.gz`, `sam.gz`, `vcf.gz`, `psltbl.gz` (1D-indexing), and `pairs.gz` (2D-indexing). ### Querying ``` @@ -155,6 +156,7 @@ px_check_dim(filename) > # indexing > px_build_index(filename, sc=2, bc=3, ec=3, sc2=4, bc2=5, ec2=5, force=TRUE) > px_build_index(filename, 'pairs', force=TRUE) # equivalent to the above line +> px_build_index(filename, force=TRUE) # equivalent to the above line, since file extension pairs.gz is recognized. > > # single-query > querystr = "chr10:1-3000000|chr20" @@ -239,6 +241,9 @@ Individual R functions are written and documented in `R/`. The `src/rpairixlib.c ## Version history +### 0.1.1 +* `px_build_index`: When neither `preset` nor a custom set of columns is given, file extensions are automatically recognized for indexing. + ### 0.1.0 * `px_build_index` is added. (Now indexing can be done using Rpairix as well as querying.) diff --git a/Rpairix.Rproj b/Rpairix.Rproj index 82d70a3..b4c7648 100644 --- a/Rpairix.Rproj +++ b/Rpairix.Rproj @@ -1,4 +1,4 @@ -Version: 0.1.0 +Version: 0.1.1 RestoreWorkspace: No SaveWorkspace: No diff --git a/inst/test_4dn.pairs.gz.px2 b/inst/test_4dn.pairs.gz.px2 index 8e230c2..e2b0414 100644 Binary files a/inst/test_4dn.pairs.gz.px2 and b/inst/test_4dn.pairs.gz.px2 differ diff --git a/src/rpairixlib.c b/src/rpairixlib.c index 04d078d..fc3bb82 100644 --- a/src/rpairixlib.c +++ b/src/rpairixlib.c @@ -355,7 +355,18 @@ void build_index(char **pinputfilename, char **ppreset, int *psc, int *pbc, int if ( bgzf_is_bgzf(*pinputfilename)!=1 ) *pflag = -3; else { ti_conf_t conf; - if (strcmp(*ppreset, "") == 0 && *psc != 0 && *pbc != 0){ + if (strcmp(*ppreset, "") == 0 && *psc == 0 && *pbc == 0){ + int l = strlen(*pinputfilename); + int strcasecmp(const char *s1, const char *s2); + if (l>=7 && strcasecmp(*pinputfilename+l-7, ".gff.gz") == 0) conf = ti_conf_gff; + else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".bed.gz") == 0) conf = ti_conf_bed; + else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".sam.gz") == 0) conf = ti_conf_sam; + else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".vcf.gz") == 0) conf = ti_conf_vcf; + else if (l>=10 && strcasecmp(*pinputfilename+l-10, ".psltbl.gz") == 0) conf = ti_conf_psltbl; + else if (l>=9 && strcasecmp(*pinputfilename+l-9, ".pairs.gz") == 0) conf = ti_conf_pairs; + else *pflag = -5; // file extension not recognized and no preset specified + } + else if (strcmp(*ppreset, "") == 0 && *psc != 0 && *pbc != 0){ conf.sc = *psc; conf.bc = *pbc; conf.ec = *pec; @@ -376,7 +387,7 @@ void build_index(char **pinputfilename, char **ppreset, int *psc, int *pbc, int else if (strcmp(*ppreset, "old_merged_nodups") == 0) conf = ti_conf_old_merged_nodups; else *pflag = -2; // wrong preset - if (*pflag != -2 ) *pflag= ti_index_build(*pinputfilename, &conf); // -1 if failed + if (*pflag != -2 && *pflag != -5 ) *pflag= ti_index_build(*pinputfilename, &conf); // -1 if failed } } }