Skip to content

Commit

Permalink
Merge pull request #16 from 4dn-dcic/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
SooLee authored Apr 5, 2017
2 parents e0c67d0 + 1d36772 commit ccfb1ce
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 7 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Rpairix
Title: Rpairix
Version: 0.1.0
Version: 0.1.1
Authors@R: person("Soo", "Lee", email = "duplexa@gmail.com", role = c("aut", "cre"))
Description: R binder for pairix, tool for querying a pair of genomic ranges in a pairs file (pairix-indexed bgzipped text file)
Depends:
Expand Down
1 change: 1 addition & 0 deletions R/px_build_index.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ px_build_index<-function(filename, preset='', sc=0, bc=0, ec=0, sc2=0, bc2=0, ec
if(out[[13]][1] == -2) { message("Can't recognize preset."); return(-1); }
if(out[[13]][1] == -3) { message("Was bgzip used to compress this file?"); return(-1); }
if(out[[13]][1] == -4) { message("The index file exists. Please use force=TRUE to overwrite"); return(-1); }
if(out[[13]][1] == -5) { message("Can't recognize file type, with no preset specified."); return(-1); }
return(0);
}

4 changes: 2 additions & 2 deletions R/px_query.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#'
#' ## 2D-indexed file
#' filename = system.file(".","test_4dn.pairs.gz", package="Rpairix")
#' querystr = c("chr10|chr20","chr2|chr2")
#' querystr = c("chr10|chr20","chr22|chr22")
#' res = px_query(filename, querystr)
#' print(res)
#'
Expand Down Expand Up @@ -48,7 +48,7 @@
#'
#' ## 1D-indexed file
#' filename = system.file(".","SRR1171591.variants.snp.vqsr.p.vcf.gz", package="Rpairix")
#' querystr = 'chr10'
#' querystr = 'chr10|5000000-20000000'
#' res = px_query(filename, querystr)
#' print(res)
#'
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ R --no-site-file --no-environ --no-save --no-restore CMD INSTALL --install-tests
To install a specific version,
```
library(devtools)
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.0.zip")
install_url("https://github.com/4dn-dcic/Rpairix/archive/0.1.1.zip")
```


Expand Down Expand Up @@ -78,6 +78,7 @@ sc2 second sequence (chromosome) column index (1-based). Zero (0) means not spec
* `line_skip` : number of lines to skip in the beginning. (default 0)
* `force` : If TRUE, overwrite existing index file. If FALSE, do not overwrite unless the index file is older than the bgzipped file. (default FALSE)
* An index file sometextfile.gz.px2 will be created.
* When neither `preset` nor `sc`(and `bc`) is given, the following file extensions are automatically recognized: `gff.gz`, `bed.gz`, `sam.gz`, `vcf.gz`, `psltbl.gz` (1D-indexing), and `pairs.gz` (2D-indexing).

### Querying
```
Expand Down Expand Up @@ -155,6 +156,7 @@ px_check_dim(filename)
> # indexing
> px_build_index(filename, sc=2, bc=3, ec=3, sc2=4, bc2=5, ec2=5, force=TRUE)
> px_build_index(filename, 'pairs', force=TRUE) # equivalent to the above line
> px_build_index(filename, force=TRUE) # equivalent to the above line, since file extension pairs.gz is recognized.
>
> # single-query
> querystr = "chr10:1-3000000|chr20"
Expand Down Expand Up @@ -239,6 +241,9 @@ Individual R functions are written and documented in `R/`. The `src/rpairixlib.c


## Version history
### 0.1.1
* `px_build_index`: When neither `preset` nor a custom set of columns is given, file extensions are automatically recognized for indexing.

### 0.1.0
* `px_build_index` is added. (Now indexing can be done using Rpairix as well as querying.)

Expand Down
2 changes: 1 addition & 1 deletion Rpairix.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version: 0.1.0
Version: 0.1.1

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
Binary file modified inst/test_4dn.pairs.gz.px2
Binary file not shown.
15 changes: 13 additions & 2 deletions src/rpairixlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,18 @@ void build_index(char **pinputfilename, char **ppreset, int *psc, int *pbc, int
if ( bgzf_is_bgzf(*pinputfilename)!=1 ) *pflag = -3;
else {
ti_conf_t conf;
if (strcmp(*ppreset, "") == 0 && *psc != 0 && *pbc != 0){
if (strcmp(*ppreset, "") == 0 && *psc == 0 && *pbc == 0){
int l = strlen(*pinputfilename);
int strcasecmp(const char *s1, const char *s2);
if (l>=7 && strcasecmp(*pinputfilename+l-7, ".gff.gz") == 0) conf = ti_conf_gff;
else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".bed.gz") == 0) conf = ti_conf_bed;
else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".sam.gz") == 0) conf = ti_conf_sam;
else if (l>=7 && strcasecmp(*pinputfilename+l-7, ".vcf.gz") == 0) conf = ti_conf_vcf;
else if (l>=10 && strcasecmp(*pinputfilename+l-10, ".psltbl.gz") == 0) conf = ti_conf_psltbl;
else if (l>=9 && strcasecmp(*pinputfilename+l-9, ".pairs.gz") == 0) conf = ti_conf_pairs;
else *pflag = -5; // file extension not recognized and no preset specified
}
else if (strcmp(*ppreset, "") == 0 && *psc != 0 && *pbc != 0){
conf.sc = *psc;
conf.bc = *pbc;
conf.ec = *pec;
Expand All @@ -376,7 +387,7 @@ void build_index(char **pinputfilename, char **ppreset, int *psc, int *pbc, int
else if (strcmp(*ppreset, "old_merged_nodups") == 0) conf = ti_conf_old_merged_nodups;
else *pflag = -2; // wrong preset

if (*pflag != -2 ) *pflag= ti_index_build(*pinputfilename, &conf); // -1 if failed
if (*pflag != -2 && *pflag != -5 ) *pflag= ti_index_build(*pinputfilename, &conf); // -1 if failed
}
}
}
Expand Down

0 comments on commit ccfb1ce

Please sign in to comment.